• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <libsnapshot/snapshot.h>
16 
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <math.h>
20 #include <sys/file.h>
21 #include <sys/types.h>
22 #include <sys/unistd.h>
23 
24 #include <filesystem>
25 #include <optional>
26 #include <thread>
27 #include <unordered_set>
28 
29 #include <android-base/file.h>
30 #include <android-base/logging.h>
31 #include <android-base/parseint.h>
32 #include <android-base/properties.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35 #include <cutils/sockets.h>
36 #include <ext4_utils/ext4_utils.h>
37 #include <fs_mgr.h>
38 #include <fs_mgr/file_wait.h>
39 #include <fs_mgr_dm_linear.h>
40 #include <fstab/fstab.h>
41 #include <libdm/dm.h>
42 #include <libfiemap/image_manager.h>
43 #include <liblp/liblp.h>
44 
45 #include <android/snapshot/snapshot.pb.h>
46 #include <libsnapshot/snapshot_stats.h>
47 #include "device_info.h"
48 #include "partition_cow_creator.h"
49 #include "snapshot_metadata_updater.h"
50 #include "snapshot_reader.h"
51 #include "utility.h"
52 
53 namespace android {
54 namespace snapshot {
55 
56 using android::base::unique_fd;
57 using android::dm::DeviceMapper;
58 using android::dm::DmDeviceState;
59 using android::dm::DmTable;
60 using android::dm::DmTargetLinear;
61 using android::dm::DmTargetSnapshot;
62 using android::dm::DmTargetUser;
63 using android::dm::kSectorSize;
64 using android::dm::SnapshotStorageMode;
65 using android::fiemap::FiemapStatus;
66 using android::fiemap::IImageManager;
67 using android::fs_mgr::CreateDmTable;
68 using android::fs_mgr::CreateLogicalPartition;
69 using android::fs_mgr::CreateLogicalPartitionParams;
70 using android::fs_mgr::GetPartitionGroupName;
71 using android::fs_mgr::GetPartitionName;
72 using android::fs_mgr::LpMetadata;
73 using android::fs_mgr::MetadataBuilder;
74 using android::fs_mgr::SlotNumberForSlotSuffix;
75 using android::hardware::boot::V1_1::MergeStatus;
76 using chromeos_update_engine::DeltaArchiveManifest;
77 using chromeos_update_engine::Extent;
78 using chromeos_update_engine::FileDescriptor;
79 using chromeos_update_engine::PartitionUpdate;
80 template <typename T>
81 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
82 using std::chrono::duration_cast;
83 using namespace std::chrono_literals;
84 using namespace std::string_literals;
85 
86 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
87 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
88 static constexpr auto kUpdateStateCheckInterval = 2s;
89 
90 MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status);
91 
92 // Note: IImageManager is an incomplete type in the header, so the default
93 // destructor doesn't work.
~SnapshotManager()94 SnapshotManager::~SnapshotManager() {}
95 
New(IDeviceInfo * info)96 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
97     if (!info) {
98         info = new DeviceInfo();
99     }
100 
101     return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
102 }
103 
NewForFirstStageMount(IDeviceInfo * info)104 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
105     if (!info) {
106         DeviceInfo* impl = new DeviceInfo();
107         impl->set_first_stage_init(true);
108         info = impl;
109     }
110     auto sm = New(info);
111 
112     // The first-stage version of snapuserd is explicitly started by init. Do
113     // not attempt to using it during tests (which run in normal AOSP).
114     if (!sm->device()->IsTestDevice()) {
115         sm->use_first_stage_snapuserd_ = true;
116     }
117     return sm;
118 }
119 
SnapshotManager(IDeviceInfo * device)120 SnapshotManager::SnapshotManager(IDeviceInfo* device)
121     : dm_(device->GetDeviceMapper()), device_(device), metadata_dir_(device_->GetMetadataDir()) {
122     merge_consistency_checker_ = android::snapshot::CheckMergeConsistency;
123 }
124 
GetCowName(const std::string & snapshot_name)125 static std::string GetCowName(const std::string& snapshot_name) {
126     return snapshot_name + "-cow";
127 }
128 
GetSnapshotDriver(LockedFile * lock)129 SnapshotManager::SnapshotDriver SnapshotManager::GetSnapshotDriver(LockedFile* lock) {
130     if (UpdateUsesUserSnapshots(lock)) {
131         return SnapshotManager::SnapshotDriver::DM_USER;
132     } else {
133         return SnapshotManager::SnapshotDriver::DM_SNAPSHOT;
134     }
135 }
136 
GetDmUserCowName(const std::string & snapshot_name,SnapshotManager::SnapshotDriver driver)137 static std::string GetDmUserCowName(const std::string& snapshot_name,
138                                     SnapshotManager::SnapshotDriver driver) {
139     // dm-user block device will act as a snapshot device. We identify it with
140     // the same partition name so that when partitions can be mounted off
141     // dm-user.
142 
143     switch (driver) {
144         case SnapshotManager::SnapshotDriver::DM_USER: {
145             return snapshot_name;
146         }
147 
148         case SnapshotManager::SnapshotDriver::DM_SNAPSHOT: {
149             return snapshot_name + "-user-cow";
150         }
151 
152         default: {
153             LOG(ERROR) << "Invalid snapshot driver";
154             return "";
155         }
156     }
157 }
158 
GetCowImageDeviceName(const std::string & snapshot_name)159 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
160     return snapshot_name + "-cow-img";
161 }
162 
GetBaseDeviceName(const std::string & partition_name)163 static std::string GetBaseDeviceName(const std::string& partition_name) {
164     return partition_name + "-base";
165 }
166 
GetSourceDeviceName(const std::string & partition_name)167 static std::string GetSourceDeviceName(const std::string& partition_name) {
168     return partition_name + "-src";
169 }
170 
BeginUpdate()171 bool SnapshotManager::BeginUpdate() {
172     bool needs_merge = false;
173     if (!TryCancelUpdate(&needs_merge)) {
174         return false;
175     }
176     if (needs_merge) {
177         LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
178         auto state = ProcessUpdateState();
179         LOG(INFO) << "Merged with state = " << state;
180     }
181 
182     auto file = LockExclusive();
183     if (!file) return false;
184 
185     // Purge the ImageManager just in case there is a corrupt lp_metadata file
186     // lying around. (NB: no need to return false on an error, we can let the
187     // update try to progress.)
188     if (EnsureImageManager()) {
189         images_->RemoveAllImages();
190     }
191 
192     // Clear any cached metadata (this allows re-using one manager across tests).
193     old_partition_metadata_ = nullptr;
194 
195     auto state = ReadUpdateState(file.get());
196     if (state != UpdateState::None) {
197         LOG(ERROR) << "An update is already in progress, cannot begin a new update";
198         return false;
199     }
200     return WriteUpdateState(file.get(), UpdateState::Initiated);
201 }
202 
CancelUpdate()203 bool SnapshotManager::CancelUpdate() {
204     bool needs_merge = false;
205     if (!TryCancelUpdate(&needs_merge)) {
206         return false;
207     }
208     if (needs_merge) {
209         LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
210     }
211     return !needs_merge;
212 }
213 
TryCancelUpdate(bool * needs_merge)214 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
215     *needs_merge = false;
216 
217     auto file = LockExclusive();
218     if (!file) return false;
219 
220     UpdateState state = ReadUpdateState(file.get());
221     if (state == UpdateState::None) {
222         RemoveInvalidSnapshots(file.get());
223         return true;
224     }
225 
226     if (state == UpdateState::Initiated) {
227         LOG(INFO) << "Update has been initiated, now canceling";
228         return RemoveAllUpdateState(file.get());
229     }
230 
231     if (state == UpdateState::Unverified) {
232         // We completed an update, but it can still be canceled if we haven't booted into it.
233         auto slot = GetCurrentSlot();
234         if (slot != Slot::Target) {
235             LOG(INFO) << "Canceling previously completed updates (if any)";
236             return RemoveAllUpdateState(file.get());
237         }
238     }
239     *needs_merge = true;
240     return true;
241 }
242 
ReadUpdateSourceSlotSuffix()243 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
244     auto boot_file = GetSnapshotBootIndicatorPath();
245     std::string contents;
246     if (!android::base::ReadFileToString(boot_file, &contents)) {
247         PLOG(WARNING) << "Cannot read " << boot_file;
248         return {};
249     }
250     return contents;
251 }
252 
GetCurrentSlot()253 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
254     auto contents = ReadUpdateSourceSlotSuffix();
255     if (contents.empty()) {
256         return Slot::Unknown;
257     }
258     if (device_->GetSlotSuffix() == contents) {
259         return Slot::Source;
260     }
261     return Slot::Target;
262 }
263 
GetSnapshotSlotSuffix()264 std::string SnapshotManager::GetSnapshotSlotSuffix() {
265     switch (GetCurrentSlot()) {
266         case Slot::Target:
267             return device_->GetSlotSuffix();
268         default:
269             return device_->GetOtherSlotSuffix();
270     }
271 }
272 
RemoveFileIfExists(const std::string & path)273 static bool RemoveFileIfExists(const std::string& path) {
274     std::string message;
275     if (!android::base::RemoveFileIfExists(path, &message)) {
276         LOG(ERROR) << "Remove failed: " << path << ": " << message;
277         return false;
278     }
279     return true;
280 }
281 
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)282 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
283     if (prolog && !prolog()) {
284         LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
285         return false;
286     }
287 
288     LOG(INFO) << "Removing all update state.";
289 
290     if (!RemoveAllSnapshots(lock)) {
291         LOG(ERROR) << "Could not remove all snapshots";
292         return false;
293     }
294 
295     // It's okay if these fail:
296     // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
297     // reading the indicator file, so it's not a problem if it still exists
298     // after the update completes.
299     // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
300     // matches the incoming update.
301     std::vector<std::string> files = {
302             GetSnapshotBootIndicatorPath(),
303             GetRollbackIndicatorPath(),
304             GetForwardMergeIndicatorPath(),
305             GetOldPartitionMetadataPath(),
306     };
307     for (const auto& file : files) {
308         RemoveFileIfExists(file);
309     }
310 
311     // If this fails, we'll keep trying to remove the update state (as the
312     // device reboots or starts a new update) until it finally succeeds.
313     return WriteUpdateState(lock, UpdateState::None);
314 }
315 
FinishedSnapshotWrites(bool wipe)316 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
317     auto lock = LockExclusive();
318     if (!lock) return false;
319 
320     auto update_state = ReadUpdateState(lock.get());
321     if (update_state == UpdateState::Unverified) {
322         LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
323         return true;
324     }
325 
326     if (update_state != UpdateState::Initiated) {
327         LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
328         return false;
329     }
330 
331     if (!EnsureNoOverflowSnapshot(lock.get())) {
332         LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
333         return false;
334     }
335 
336     if (!UpdateForwardMergeIndicator(wipe)) {
337         return false;
338     }
339 
340     // This file is written on boot to detect whether a rollback occurred. It
341     // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
342     // snapshots too early.
343     if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
344         return false;
345     }
346 
347     // This file acts as both a quick indicator for init (it can use access(2)
348     // to decide how to do first-stage mounts), and it stores the old slot, so
349     // we can tell whether or not we performed a rollback.
350     auto contents = device_->GetSlotSuffix();
351     auto boot_file = GetSnapshotBootIndicatorPath();
352     if (!WriteStringToFileAtomic(contents, boot_file)) {
353         PLOG(ERROR) << "write failed: " << boot_file;
354         return false;
355     }
356     return WriteUpdateState(lock.get(), UpdateState::Unverified);
357 }
358 
CreateSnapshot(LockedFile * lock,PartitionCowCreator * cow_creator,SnapshotStatus * status)359 bool SnapshotManager::CreateSnapshot(LockedFile* lock, PartitionCowCreator* cow_creator,
360                                      SnapshotStatus* status) {
361     CHECK(lock);
362     CHECK(lock->lock_mode() == LOCK_EX);
363     CHECK(status);
364 
365     if (status->name().empty()) {
366         LOG(ERROR) << "SnapshotStatus has no name.";
367         return false;
368     }
369     // Check these sizes. Like liblp, we guarantee the partition size is
370     // respected, which means it has to be sector-aligned. (This guarantee is
371     // useful for locating avb footers correctly). The COW file size, however,
372     // can be arbitrarily larger than specified, so we can safely round it up.
373     if (status->device_size() % kSectorSize != 0) {
374         LOG(ERROR) << "Snapshot " << status->name()
375                    << " device size is not a multiple of the sector size: "
376                    << status->device_size();
377         return false;
378     }
379     if (status->snapshot_size() % kSectorSize != 0) {
380         LOG(ERROR) << "Snapshot " << status->name()
381                    << " snapshot size is not a multiple of the sector size: "
382                    << status->snapshot_size();
383         return false;
384     }
385     if (status->cow_partition_size() % kSectorSize != 0) {
386         LOG(ERROR) << "Snapshot " << status->name()
387                    << " cow partition size is not a multiple of the sector size: "
388                    << status->cow_partition_size();
389         return false;
390     }
391     if (status->cow_file_size() % kSectorSize != 0) {
392         LOG(ERROR) << "Snapshot " << status->name()
393                    << " cow file size is not a multiple of the sector size: "
394                    << status->cow_file_size();
395         return false;
396     }
397 
398     status->set_state(SnapshotState::CREATED);
399     status->set_sectors_allocated(0);
400     status->set_metadata_sectors(0);
401     status->set_compression_enabled(cow_creator->compression_enabled);
402     status->set_compression_algorithm(cow_creator->compression_algorithm);
403 
404     if (!WriteSnapshotStatus(lock, *status)) {
405         PLOG(ERROR) << "Could not write snapshot status: " << status->name();
406         return false;
407     }
408     return true;
409 }
410 
CreateCowImage(LockedFile * lock,const std::string & name)411 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
412     CHECK(lock);
413     CHECK(lock->lock_mode() == LOCK_EX);
414     if (!EnsureImageManager()) return Return::Error();
415 
416     SnapshotStatus status;
417     if (!ReadSnapshotStatus(lock, name, &status)) {
418         return Return::Error();
419     }
420 
421     // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
422     if (status.cow_file_size() % kSectorSize != 0) {
423         LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
424                    << status.cow_file_size();
425         return Return::Error();
426     }
427 
428     std::string cow_image_name = GetCowImageDeviceName(name);
429     int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
430     return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
431 }
432 
MapDmUserCow(LockedFile * lock,const std::string & name,const std::string & cow_file,const std::string & base_device,const std::string & base_path_merge,const std::chrono::milliseconds & timeout_ms,std::string * path)433 bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name,
434                                    const std::string& cow_file, const std::string& base_device,
435                                    const std::string& base_path_merge,
436                                    const std::chrono::milliseconds& timeout_ms, std::string* path) {
437     CHECK(lock);
438 
439     if (UpdateUsesUserSnapshots(lock)) {
440         SnapshotStatus status;
441         if (!ReadSnapshotStatus(lock, name, &status)) {
442             LOG(ERROR) << "MapDmUserCow: ReadSnapshotStatus failed...";
443             return false;
444         }
445 
446         if (status.state() == SnapshotState::NONE ||
447             status.state() == SnapshotState::MERGE_COMPLETED) {
448             LOG(ERROR) << "Should not create a snapshot device for " << name
449                        << " after merging has completed.";
450             return false;
451         }
452 
453         SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
454         if (update_status.state() == UpdateState::MergeCompleted ||
455             update_status.state() == UpdateState::MergeNeedsReboot) {
456             LOG(ERROR) << "Should not create a snapshot device for " << name
457                        << " after global merging has completed.";
458             return false;
459         }
460     }
461 
462     // Use an extra decoration for first-stage init, so we can transition
463     // to a new table entry in second-stage.
464     std::string misc_name = name;
465     if (use_first_stage_snapuserd_) {
466         misc_name += "-init";
467     }
468 
469     if (!EnsureSnapuserdConnected()) {
470         return false;
471     }
472 
473     uint64_t base_sectors = 0;
474     if (!UpdateUsesUserSnapshots(lock)) {
475         base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device);
476         if (base_sectors == 0) {
477             LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
478             return false;
479         }
480     } else {
481         // For userspace snapshots, the size of the base device is taken as the
482         // size of the dm-user block device. Since there is no pseudo mapping
483         // created in the daemon, we no longer need to rely on the daemon for
484         // sizing the dm-user block device.
485         unique_fd fd(TEMP_FAILURE_RETRY(open(base_path_merge.c_str(), O_RDONLY | O_CLOEXEC)));
486         if (fd < 0) {
487             LOG(ERROR) << "Cannot open block device: " << base_path_merge;
488             return false;
489         }
490 
491         uint64_t dev_sz = get_block_device_size(fd.get());
492         if (!dev_sz) {
493             LOG(ERROR) << "Failed to find block device size: " << base_path_merge;
494             return false;
495         }
496 
497         base_sectors = dev_sz >> 9;
498     }
499 
500     DmTable table;
501     table.Emplace<DmTargetUser>(0, base_sectors, misc_name);
502     if (!dm_.CreateDevice(name, table, path, timeout_ms)) {
503         LOG(ERROR) << " dm-user: CreateDevice failed... ";
504         return false;
505     }
506     if (!WaitForDevice(*path, timeout_ms)) {
507         LOG(ERROR) << " dm-user: timeout: Failed to create block device for: " << name;
508         return false;
509     }
510 
511     auto control_device = "/dev/dm-user/" + misc_name;
512     if (!WaitForDevice(control_device, timeout_ms)) {
513         return false;
514     }
515 
516     if (UpdateUsesUserSnapshots(lock)) {
517         // Now that the dm-user device is created, initialize the daemon and
518         // spin up the worker threads.
519         if (!snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device, base_path_merge)) {
520             LOG(ERROR) << "InitDmUserCow failed";
521             return false;
522         }
523     }
524 
525     return snapuserd_client_->AttachDmUser(misc_name);
526 }
527 
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)528 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
529                                   const std::string& base_device, const std::string& cow_device,
530                                   const std::chrono::milliseconds& timeout_ms,
531                                   std::string* dev_path) {
532     CHECK(lock);
533 
534     SnapshotStatus status;
535     if (!ReadSnapshotStatus(lock, name, &status)) {
536         return false;
537     }
538     if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
539         LOG(ERROR) << "Should not create a snapshot device for " << name
540                    << " after merging has completed.";
541         return false;
542     }
543 
544     // Validate the block device size, as well as the requested snapshot size.
545     // Note that during first-stage init, we don't have the device paths.
546     if (android::base::StartsWith(base_device, "/")) {
547         unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
548         if (fd < 0) {
549             PLOG(ERROR) << "open failed: " << base_device;
550             return false;
551         }
552         auto dev_size = get_block_device_size(fd);
553         if (!dev_size) {
554             PLOG(ERROR) << "Could not determine block device size: " << base_device;
555             return false;
556         }
557         if (status.device_size() != dev_size) {
558             LOG(ERROR) << "Block device size for " << base_device << " does not match"
559                        << "(expected " << status.device_size() << ", got " << dev_size << ")";
560             return false;
561         }
562     }
563     if (status.device_size() % kSectorSize != 0) {
564         LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
565         return false;
566     }
567     if (status.snapshot_size() % kSectorSize != 0 ||
568         status.snapshot_size() > status.device_size()) {
569         LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
570         return false;
571     }
572     if (status.device_size() != status.snapshot_size()) {
573         LOG(ERROR) << "Device size and snapshot size must be the same (device size = "
574                    << status.device_size() << ", snapshot size = " << status.snapshot_size();
575         return false;
576     }
577 
578     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
579 
580     // Note that merging is a global state. We do track whether individual devices
581     // have completed merging, but the start of the merge process is considered
582     // atomic.
583     SnapshotStorageMode mode;
584     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
585     switch (update_status.state()) {
586         case UpdateState::MergeCompleted:
587         case UpdateState::MergeNeedsReboot:
588             LOG(ERROR) << "Should not create a snapshot device for " << name
589                        << " after global merging has completed.";
590             return false;
591         case UpdateState::Merging:
592         case UpdateState::MergeFailed:
593             // Note: MergeFailed indicates that a merge is in progress, but
594             // is possibly stalled. We still have to honor the merge.
595             if (DecideMergePhase(status) == update_status.merge_phase()) {
596                 mode = SnapshotStorageMode::Merge;
597             } else {
598                 mode = SnapshotStorageMode::Persistent;
599             }
600             break;
601         default:
602             mode = SnapshotStorageMode::Persistent;
603             break;
604     }
605 
606     if (mode == SnapshotStorageMode::Persistent && status.state() == SnapshotState::MERGING) {
607         LOG(ERROR) << "Snapshot: " << name
608                    << " has snapshot status Merging but mode set to Persistent."
609                    << " Changing mode to Snapshot-Merge.";
610         mode = SnapshotStorageMode::Merge;
611     }
612 
613     DmTable table;
614     table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
615                                     kSnapshotChunkSize);
616     if (!dm_.CreateDevice(name, table, dev_path, timeout_ms)) {
617         LOG(ERROR) << "Could not create snapshot device: " << name;
618         return false;
619     }
620     return true;
621 }
622 
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)623 std::optional<std::string> SnapshotManager::MapCowImage(
624         const std::string& name, const std::chrono::milliseconds& timeout_ms) {
625     if (!EnsureImageManager()) return std::nullopt;
626     auto cow_image_name = GetCowImageDeviceName(name);
627 
628     bool ok;
629     std::string cow_dev;
630     if (device_->IsRecovery() || device_->IsFirstStageInit()) {
631         const auto& opener = device_->GetPartitionOpener();
632         ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
633     } else {
634         ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
635     }
636 
637     if (ok) {
638         LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
639         return cow_dev;
640     }
641     LOG(ERROR) << "Could not map image device: " << cow_image_name;
642     return std::nullopt;
643 }
644 
MapSourceDevice(LockedFile * lock,const std::string & name,const std::chrono::milliseconds & timeout_ms,std::string * path)645 bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name,
646                                       const std::chrono::milliseconds& timeout_ms,
647                                       std::string* path) {
648     CHECK(lock);
649 
650     auto metadata = ReadOldPartitionMetadata(lock);
651     if (!metadata) {
652         LOG(ERROR) << "Could not map source device due to missing or corrupt metadata";
653         return false;
654     }
655 
656     auto old_name = GetOtherPartitionName(name);
657     auto slot_suffix = device_->GetSlotSuffix();
658     auto slot = SlotNumberForSlotSuffix(slot_suffix);
659 
660     CreateLogicalPartitionParams params = {
661             .block_device = device_->GetSuperDevice(slot),
662             .metadata = metadata,
663             .partition_name = old_name,
664             .timeout_ms = timeout_ms,
665             .device_name = GetSourceDeviceName(name),
666             .partition_opener = &device_->GetPartitionOpener(),
667     };
668     if (!CreateLogicalPartition(std::move(params), path)) {
669         LOG(ERROR) << "Could not create source device for snapshot " << name;
670         return false;
671     }
672     return true;
673 }
674 
UnmapSnapshot(LockedFile * lock,const std::string & name)675 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
676     CHECK(lock);
677 
678     if (UpdateUsesUserSnapshots(lock)) {
679         if (!UnmapUserspaceSnapshotDevice(lock, name)) {
680             return false;
681         }
682     } else {
683         if (!DeleteDeviceIfExists(name)) {
684             LOG(ERROR) << "Could not delete snapshot device: " << name;
685             return false;
686         }
687     }
688     return true;
689 }
690 
UnmapCowImage(const std::string & name)691 bool SnapshotManager::UnmapCowImage(const std::string& name) {
692     if (!EnsureImageManager()) return false;
693     return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
694 }
695 
DeleteSnapshot(LockedFile * lock,const std::string & name)696 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
697     CHECK(lock);
698     CHECK(lock->lock_mode() == LOCK_EX);
699     if (!EnsureImageManager()) return false;
700 
701     if (!UnmapCowDevices(lock, name)) {
702         return false;
703     }
704 
705     // We can't delete snapshots in recovery. The only way we'd try is it we're
706     // completing or canceling a merge in preparation for a data wipe, in which
707     // case, we don't care if the file sticks around.
708     if (device_->IsRecovery()) {
709         LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
710         return true;
711     }
712 
713     auto cow_image_name = GetCowImageDeviceName(name);
714     if (images_->BackingImageExists(cow_image_name)) {
715         if (!images_->DeleteBackingImage(cow_image_name)) {
716             return false;
717         }
718     }
719 
720     std::string error;
721     auto file_path = GetSnapshotStatusFilePath(name);
722     if (!android::base::RemoveFileIfExists(file_path, &error)) {
723         LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
724         return false;
725     }
726     return true;
727 }
728 
InitiateMerge()729 bool SnapshotManager::InitiateMerge() {
730     auto lock = LockExclusive();
731     if (!lock) return false;
732 
733     UpdateState state = ReadUpdateState(lock.get());
734     if (state != UpdateState::Unverified) {
735         LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
736         return false;
737     }
738 
739     auto slot = GetCurrentSlot();
740     if (slot != Slot::Target) {
741         LOG(ERROR) << "Device cannot merge while not booting from new slot";
742         return false;
743     }
744 
745     std::vector<std::string> snapshots;
746     if (!ListSnapshots(lock.get(), &snapshots)) {
747         LOG(ERROR) << "Could not list snapshots";
748         return false;
749     }
750 
751     auto other_suffix = device_->GetOtherSlotSuffix();
752 
753     for (const auto& snapshot : snapshots) {
754         if (android::base::EndsWith(snapshot, other_suffix)) {
755             // Allow the merge to continue, but log this unexpected case.
756             LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
757             continue;
758         }
759 
760         // The device has to be mapped, since everything should be merged at
761         // the same time. This is a fairly serious error. We could forcefully
762         // map everything here, but it should have been mapped during first-
763         // stage init.
764         if (dm_.GetState(snapshot) == DmDeviceState::INVALID) {
765             LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
766             return false;
767         }
768     }
769 
770     auto metadata = ReadCurrentMetadata();
771     for (auto it = snapshots.begin(); it != snapshots.end();) {
772         switch (GetMetadataPartitionState(*metadata, *it)) {
773             case MetadataPartitionState::Flashed:
774                 LOG(WARNING) << "Detected re-flashing for partition " << *it
775                              << ". Skip merging it.";
776                 [[fallthrough]];
777             case MetadataPartitionState::None: {
778                 LOG(WARNING) << "Deleting snapshot for partition " << *it;
779                 if (!DeleteSnapshot(lock.get(), *it)) {
780                     LOG(WARNING) << "Cannot delete snapshot for partition " << *it
781                                  << ". Skip merging it anyways.";
782                 }
783                 it = snapshots.erase(it);
784             } break;
785             case MetadataPartitionState::Updated: {
786                 ++it;
787             } break;
788         }
789     }
790 
791     bool compression_enabled = false;
792 
793     std::vector<std::string> first_merge_group;
794 
795     DmTargetSnapshot::Status initial_target_values = {};
796     for (const auto& snapshot : snapshots) {
797         if (!UpdateUsesUserSnapshots(lock.get())) {
798             DmTargetSnapshot::Status current_status;
799             if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) {
800                 return false;
801             }
802             initial_target_values.sectors_allocated += current_status.sectors_allocated;
803             initial_target_values.total_sectors += current_status.total_sectors;
804             initial_target_values.metadata_sectors += current_status.metadata_sectors;
805         }
806 
807         SnapshotStatus snapshot_status;
808         if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
809             return false;
810         }
811 
812         compression_enabled |= snapshot_status.compression_enabled();
813         if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) {
814             first_merge_group.emplace_back(snapshot);
815         }
816     }
817 
818     SnapshotUpdateStatus initial_status = ReadSnapshotUpdateStatus(lock.get());
819     initial_status.set_state(UpdateState::Merging);
820     initial_status.set_compression_enabled(compression_enabled);
821 
822     if (!UpdateUsesUserSnapshots(lock.get())) {
823         initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
824         initial_status.set_total_sectors(initial_target_values.total_sectors);
825         initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
826     }
827 
828     // If any partitions shrunk, we need to merge them before we merge any other
829     // partitions (see b/177935716). Otherwise, a merge from another partition
830     // may overwrite the source block of a copy operation.
831     const std::vector<std::string>* merge_group;
832     if (first_merge_group.empty()) {
833         merge_group = &snapshots;
834         initial_status.set_merge_phase(MergePhase::SECOND_PHASE);
835     } else {
836         merge_group = &first_merge_group;
837         initial_status.set_merge_phase(MergePhase::FIRST_PHASE);
838     }
839 
840     // Point of no return - mark that we're starting a merge. From now on every
841     // eligible snapshot must be a merge target.
842     if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
843         return false;
844     }
845 
846     auto reported_code = MergeFailureCode::Ok;
847     for (const auto& snapshot : *merge_group) {
848         // If this fails, we have no choice but to continue. Everything must
849         // be merged. This is not an ideal state to be in, but it is safe,
850         // because we the next boot will try again.
851         auto code = SwitchSnapshotToMerge(lock.get(), snapshot);
852         if (code != MergeFailureCode::Ok) {
853             LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
854             if (reported_code == MergeFailureCode::Ok) {
855                 reported_code = code;
856             }
857         }
858     }
859 
860     // If we couldn't switch everything to a merge target, pre-emptively mark
861     // this merge as failed. It will get acknowledged when WaitForMerge() is
862     // called.
863     if (reported_code != MergeFailureCode::Ok) {
864         WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code);
865     }
866 
867     // Return true no matter what, because a merge was initiated.
868     return true;
869 }
870 
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)871 MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
872     SnapshotStatus status;
873     if (!ReadSnapshotStatus(lock, name, &status)) {
874         return MergeFailureCode::ReadStatus;
875     }
876     if (status.state() != SnapshotState::CREATED) {
877         LOG(WARNING) << "Snapshot " << name
878                      << " has unexpected state: " << SnapshotState_Name(status.state());
879     }
880 
881     if (UpdateUsesUserSnapshots(lock)) {
882         if (EnsureSnapuserdConnected()) {
883             // This is the point where we inform the daemon to initiate/resume
884             // the merge
885             if (!snapuserd_client_->InitiateMerge(name)) {
886                 return MergeFailureCode::UnknownTable;
887             }
888         } else {
889             LOG(ERROR) << "Failed to connect to snapuserd daemon to initiate merge";
890             return MergeFailureCode::UnknownTable;
891         }
892     } else {
893         // After this, we return true because we technically did switch to a merge
894         // target. Everything else we do here is just informational.
895         if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) {
896             return code;
897         }
898     }
899 
900     status.set_state(SnapshotState::MERGING);
901 
902     if (!UpdateUsesUserSnapshots(lock)) {
903         DmTargetSnapshot::Status dm_status;
904         if (!QuerySnapshotStatus(name, nullptr, &dm_status)) {
905             LOG(ERROR) << "Could not query merge status for snapshot: " << name;
906         }
907         status.set_sectors_allocated(dm_status.sectors_allocated);
908         status.set_metadata_sectors(dm_status.metadata_sectors);
909     }
910 
911     if (!WriteSnapshotStatus(lock, status)) {
912         LOG(ERROR) << "Could not update status file for snapshot: " << name;
913     }
914     return MergeFailureCode::Ok;
915 }
916 
RewriteSnapshotDeviceTable(const std::string & name)917 MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
918     std::vector<DeviceMapper::TargetInfo> old_targets;
919     if (!dm_.GetTableInfo(name, &old_targets)) {
920         LOG(ERROR) << "Could not read snapshot device table: " << name;
921         return MergeFailureCode::GetTableInfo;
922     }
923     if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
924         LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name;
925         return MergeFailureCode::UnknownTable;
926     }
927 
928     std::string base_device, cow_device;
929     if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
930         LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name;
931         return MergeFailureCode::GetTableParams;
932     }
933 
934     DmTable table;
935     table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
936                                     SnapshotStorageMode::Merge, kSnapshotChunkSize);
937     if (!dm_.LoadTableAndActivate(name, table)) {
938         LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name;
939         return MergeFailureCode::ActivateNewTable;
940     }
941     LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name;
942     return MergeFailureCode::Ok;
943 }
944 
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)945 bool SnapshotManager::GetSingleTarget(const std::string& dm_name, TableQuery query,
946                                       DeviceMapper::TargetInfo* target) {
947     if (dm_.GetState(dm_name) == DmDeviceState::INVALID) {
948         return false;
949     }
950 
951     std::vector<DeviceMapper::TargetInfo> targets;
952     bool result;
953     if (query == TableQuery::Status) {
954         result = dm_.GetTableStatus(dm_name, &targets);
955     } else {
956         result = dm_.GetTableInfo(dm_name, &targets);
957     }
958     if (!result) {
959         LOG(ERROR) << "Could not query device: " << dm_name;
960         return false;
961     }
962     if (targets.size() != 1) {
963         return false;
964     }
965 
966     *target = std::move(targets[0]);
967     return true;
968 }
969 
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)970 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
971     DeviceMapper::TargetInfo snap_target;
972     if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
973         return false;
974     }
975     auto type = DeviceMapper::GetTargetType(snap_target.spec);
976 
977     // If this is not a user-snapshot device then it should either
978     // be a dm-snapshot or dm-snapshot-merge target
979     if (type != "user") {
980         if (type != "snapshot" && type != "snapshot-merge") {
981             return false;
982         }
983     }
984 
985     if (target) {
986         *target = std::move(snap_target);
987     }
988     return true;
989 }
990 
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)991 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
992                                           DmTargetSnapshot::Status* status) {
993     DeviceMapper::TargetInfo target;
994     if (!IsSnapshotDevice(dm_name, &target)) {
995         LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
996         return false;
997     }
998     if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
999         LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
1000         return false;
1001     }
1002     if (target_type) {
1003         *target_type = DeviceMapper::GetTargetType(target.spec);
1004     }
1005     if (!status->error.empty()) {
1006         LOG(ERROR) << "Snapshot: " << dm_name << " returned error code: " << status->error;
1007         return false;
1008     }
1009     return true;
1010 }
1011 
1012 // Note that when a merge fails, we will *always* try again to complete the
1013 // merge each time the device boots. There is no harm in doing so, and if
1014 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)1015 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
1016                                                 const std::function<bool()>& before_cancel) {
1017     while (true) {
1018         auto result = CheckMergeState(before_cancel);
1019         LOG(INFO) << "ProcessUpdateState handling state: " << result.state;
1020 
1021         if (result.state == UpdateState::MergeFailed) {
1022             AcknowledgeMergeFailure(result.failure_code);
1023         }
1024         if (result.state != UpdateState::Merging) {
1025             // Either there is no merge, or the merge was finished, so no need
1026             // to keep waiting.
1027             return result.state;
1028         }
1029 
1030         if (callback && !callback()) {
1031             return result.state;
1032         }
1033 
1034         // This wait is not super time sensitive, so we have a relatively
1035         // low polling frequency.
1036         std::this_thread::sleep_for(kUpdateStateCheckInterval);
1037     }
1038 }
1039 
CheckMergeState(const std::function<bool ()> & before_cancel)1040 auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult {
1041     auto lock = LockExclusive();
1042     if (!lock) {
1043         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock);
1044     }
1045 
1046     auto result = CheckMergeState(lock.get(), before_cancel);
1047     LOG(INFO) << "CheckMergeState for snapshots returned: " << result.state;
1048 
1049     if (result.state == UpdateState::MergeCompleted) {
1050         // Do this inside the same lock. Failures get acknowledged without the
1051         // lock, because flock() might have failed.
1052         AcknowledgeMergeSuccess(lock.get());
1053     } else if (result.state == UpdateState::Cancelled) {
1054         if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
1055             LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
1056         }
1057     }
1058     return result;
1059 }
1060 
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)1061 auto SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel)
1062         -> MergeResult {
1063     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1064     switch (update_status.state()) {
1065         case UpdateState::None:
1066         case UpdateState::MergeCompleted:
1067             // Harmless races are allowed between two callers of WaitForMerge,
1068             // so in both of these cases we just propagate the state.
1069             return MergeResult(update_status.state());
1070 
1071         case UpdateState::Merging:
1072         case UpdateState::MergeNeedsReboot:
1073         case UpdateState::MergeFailed:
1074             // We'll poll each snapshot below. Note that for the NeedsReboot
1075             // case, we always poll once to give cleanup another opportunity to
1076             // run.
1077             break;
1078 
1079         case UpdateState::Unverified:
1080             // This is an edge case. Normally cancelled updates are detected
1081             // via the merge poll below, but if we never started a merge, we
1082             // need to also check here.
1083             if (HandleCancelledUpdate(lock, before_cancel)) {
1084                 return MergeResult(UpdateState::Cancelled);
1085             }
1086             return MergeResult(update_status.state());
1087 
1088         default:
1089             return MergeResult(update_status.state());
1090     }
1091 
1092     std::vector<std::string> snapshots;
1093     if (!ListSnapshots(lock, &snapshots)) {
1094         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
1095     }
1096 
1097     auto other_suffix = device_->GetOtherSlotSuffix();
1098 
1099     bool cancelled = false;
1100     bool merging = false;
1101     bool needs_reboot = false;
1102     bool wrong_phase = false;
1103     MergeFailureCode failure_code = MergeFailureCode::Ok;
1104     for (const auto& snapshot : snapshots) {
1105         if (android::base::EndsWith(snapshot, other_suffix)) {
1106             // This will have triggered an error message in InitiateMerge already.
1107             LOG(INFO) << "Skipping merge validation of unexpected snapshot: " << snapshot;
1108             continue;
1109         }
1110 
1111         auto result = CheckTargetMergeState(lock, snapshot, update_status);
1112         LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << result.state;
1113 
1114         switch (result.state) {
1115             case UpdateState::MergeFailed:
1116                 // Take the first failure code in case other failures compound.
1117                 if (failure_code == MergeFailureCode::Ok) {
1118                     failure_code = result.failure_code;
1119                 }
1120                 break;
1121             case UpdateState::Merging:
1122                 merging = true;
1123                 break;
1124             case UpdateState::MergeNeedsReboot:
1125                 needs_reboot = true;
1126                 break;
1127             case UpdateState::MergeCompleted:
1128                 break;
1129             case UpdateState::Cancelled:
1130                 cancelled = true;
1131                 break;
1132             case UpdateState::None:
1133                 wrong_phase = true;
1134                 break;
1135             default:
1136                 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
1137                            << "\"" << result.state << "\"";
1138                 if (failure_code == MergeFailureCode::Ok) {
1139                     failure_code = MergeFailureCode::UnexpectedMergeState;
1140                 }
1141                 break;
1142         }
1143     }
1144 
1145     if (merging) {
1146         // Note that we handle "Merging" before we handle anything else. We
1147         // want to poll until *nothing* is merging if we can, so everything has
1148         // a chance to get marked as completed or failed.
1149         return MergeResult(UpdateState::Merging);
1150     }
1151     if (failure_code != MergeFailureCode::Ok) {
1152         // Note: since there are many drop-out cases for failure, we acknowledge
1153         // it in WaitForMerge rather than here and elsewhere.
1154         return MergeResult(UpdateState::MergeFailed, failure_code);
1155     }
1156     if (wrong_phase) {
1157         // If we got here, no other partitions are being merged, and nothing
1158         // failed to merge. It's safe to move to the next merge phase.
1159         auto code = MergeSecondPhaseSnapshots(lock);
1160         if (code != MergeFailureCode::Ok) {
1161             return MergeResult(UpdateState::MergeFailed, code);
1162         }
1163         return MergeResult(UpdateState::Merging);
1164     }
1165     if (needs_reboot) {
1166         WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
1167         return MergeResult(UpdateState::MergeNeedsReboot);
1168     }
1169     if (cancelled) {
1170         // This is an edge case, that we handle as correctly as we sensibly can.
1171         // The underlying partition has changed behind update_engine, and we've
1172         // removed the snapshot as a result. The exact state of the update is
1173         // undefined now, but this can only happen on an unlocked device where
1174         // partitions can be flashed without wiping userdata.
1175         return MergeResult(UpdateState::Cancelled);
1176     }
1177     return MergeResult(UpdateState::MergeCompleted);
1178 }
1179 
CheckTargetMergeState(LockedFile * lock,const std::string & name,const SnapshotUpdateStatus & update_status)1180 auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
1181                                             const SnapshotUpdateStatus& update_status)
1182         -> MergeResult {
1183     SnapshotStatus snapshot_status;
1184     if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
1185         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus);
1186     }
1187 
1188     std::unique_ptr<LpMetadata> current_metadata;
1189 
1190     if (!IsSnapshotDevice(name)) {
1191         if (!current_metadata) {
1192             current_metadata = ReadCurrentMetadata();
1193         }
1194 
1195         if (!current_metadata ||
1196             GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
1197             DeleteSnapshot(lock, name);
1198             return MergeResult(UpdateState::Cancelled);
1199         }
1200 
1201         // During a check, we decided the merge was complete, but we were unable to
1202         // collapse the device-mapper stack and perform COW cleanup. If we haven't
1203         // rebooted after this check, the device will still be a snapshot-merge
1204         // target. If we have rebooted, the device will now be a linear target,
1205         // and we can try cleanup again.
1206         if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1207             // NB: It's okay if this fails now, we gave cleanup our best effort.
1208             OnSnapshotMergeComplete(lock, name, snapshot_status);
1209             return MergeResult(UpdateState::MergeCompleted);
1210         }
1211 
1212         LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name;
1213         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1214     }
1215 
1216     // This check is expensive so it is only enabled for debugging.
1217     DCHECK((current_metadata = ReadCurrentMetadata()) &&
1218            GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
1219 
1220     if (UpdateUsesUserSnapshots(lock)) {
1221         std::string merge_status;
1222         if (EnsureSnapuserdConnected()) {
1223             // Query the snapshot status from the daemon
1224             merge_status = snapuserd_client_->QuerySnapshotStatus(name);
1225         } else {
1226             MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1227         }
1228 
1229         if (merge_status == "snapshot-merge-failed") {
1230             return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1231         }
1232 
1233         // This is the case when device reboots during merge. Once the device boots,
1234         // snapuserd daemon will not resume merge immediately in first stage init.
1235         // This is slightly different as compared to dm-snapshot-merge; In this
1236         // case, metadata file will have "MERGING" state whereas the daemon will be
1237         // waiting to resume the merge. Thus, we resume the merge at this point.
1238         if (merge_status == "snapshot" && snapshot_status.state() == SnapshotState::MERGING) {
1239             if (!snapuserd_client_->InitiateMerge(name)) {
1240                 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1241             }
1242             return MergeResult(UpdateState::Merging);
1243         }
1244 
1245         if (merge_status == "snapshot" &&
1246             DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1247             update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1248             // The snapshot is not being merged because it's in the wrong phase.
1249             return MergeResult(UpdateState::None);
1250         }
1251 
1252         if (merge_status == "snapshot-merge") {
1253             if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1254                 LOG(ERROR) << "Snapshot " << name
1255                            << " is merging after being marked merge-complete.";
1256                 return MergeResult(UpdateState::MergeFailed,
1257                                    MergeFailureCode::UnmergedSectorsAfterCompletion);
1258             }
1259             return MergeResult(UpdateState::Merging);
1260         }
1261 
1262         if (merge_status != "snapshot-merge-complete") {
1263             LOG(ERROR) << "Snapshot " << name << " has incorrect status: " << merge_status;
1264             return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1265         }
1266     } else {
1267         // dm-snapshot in the kernel
1268         std::string target_type;
1269         DmTargetSnapshot::Status status;
1270         if (!QuerySnapshotStatus(name, &target_type, &status)) {
1271             return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1272         }
1273         if (target_type == "snapshot" &&
1274             DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1275             update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1276             // The snapshot is not being merged because it's in the wrong phase.
1277             return MergeResult(UpdateState::None);
1278         }
1279         if (target_type != "snapshot-merge") {
1280             // We can get here if we failed to rewrite the target type in
1281             // InitiateMerge(). If we failed to create the target in first-stage
1282             // init, boot would not succeed.
1283             LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
1284             return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1285         }
1286 
1287         // These two values are equal when merging is complete.
1288         if (status.sectors_allocated != status.metadata_sectors) {
1289             if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1290                 LOG(ERROR) << "Snapshot " << name
1291                            << " is merging after being marked merge-complete.";
1292                 return MergeResult(UpdateState::MergeFailed,
1293                                    MergeFailureCode::UnmergedSectorsAfterCompletion);
1294             }
1295             return MergeResult(UpdateState::Merging);
1296         }
1297     }
1298 
1299     // Merge is complete at this point
1300 
1301     auto code = CheckMergeConsistency(lock, name, snapshot_status);
1302     if (code != MergeFailureCode::Ok) {
1303         return MergeResult(UpdateState::MergeFailed, code);
1304     }
1305 
1306     // Merging is done. First, update the status file to indicate the merge
1307     // is complete. We do this before calling OnSnapshotMergeComplete, even
1308     // though this means the write is potentially wasted work (since in the
1309     // ideal case we'll immediately delete the file).
1310     //
1311     // This makes it simpler to reason about the next reboot: no matter what
1312     // part of cleanup failed, first-stage init won't try to create another
1313     // snapshot device for this partition.
1314     snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1315     if (!WriteSnapshotStatus(lock, snapshot_status)) {
1316         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus);
1317     }
1318     if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1319         return MergeResult(UpdateState::MergeNeedsReboot);
1320     }
1321     return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
1322 }
1323 
1324 // This returns the backing device, not the dm-user layer.
GetMappedCowDeviceName(const std::string & snapshot,const SnapshotStatus & status)1325 static std::string GetMappedCowDeviceName(const std::string& snapshot,
1326                                           const SnapshotStatus& status) {
1327     // If no partition was created (the COW exists entirely on /data), the
1328     // device-mapper layering is different than if we had a partition.
1329     if (status.cow_partition_size() == 0) {
1330         return GetCowImageDeviceName(snapshot);
1331     }
1332     return GetCowName(snapshot);
1333 }
1334 
CheckMergeConsistency(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1335 MergeFailureCode SnapshotManager::CheckMergeConsistency(LockedFile* lock, const std::string& name,
1336                                                         const SnapshotStatus& status) {
1337     CHECK(lock);
1338 
1339     return merge_consistency_checker_(name, status);
1340 }
1341 
CheckMergeConsistency(const std::string & name,const SnapshotStatus & status)1342 MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status) {
1343     if (!status.compression_enabled()) {
1344         // Do not try to verify old-style COWs yet.
1345         return MergeFailureCode::Ok;
1346     }
1347 
1348     auto& dm = DeviceMapper::Instance();
1349 
1350     std::string cow_image_name = GetMappedCowDeviceName(name, status);
1351     std::string cow_image_path;
1352     if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_path)) {
1353         LOG(ERROR) << "Failed to get path for cow device: " << cow_image_name;
1354         return MergeFailureCode::GetCowPathConsistencyCheck;
1355     }
1356 
1357     // First pass, count # of ops.
1358     size_t num_ops = 0;
1359     {
1360         unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_CLOEXEC));
1361         if (fd < 0) {
1362             PLOG(ERROR) << "Failed to open " << cow_image_name;
1363             return MergeFailureCode::OpenCowConsistencyCheck;
1364         }
1365 
1366         CowReader reader;
1367         if (!reader.Parse(std::move(fd))) {
1368             LOG(ERROR) << "Failed to parse cow " << cow_image_path;
1369             return MergeFailureCode::ParseCowConsistencyCheck;
1370         }
1371 
1372         num_ops = reader.get_num_total_data_ops();
1373     }
1374 
1375     // Second pass, try as hard as we can to get the actual number of blocks
1376     // the system thinks is merged.
1377     unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_DIRECT | O_SYNC | O_CLOEXEC));
1378     if (fd < 0) {
1379         PLOG(ERROR) << "Failed to open direct " << cow_image_name;
1380         return MergeFailureCode::OpenCowDirectConsistencyCheck;
1381     }
1382 
1383     void* addr;
1384     size_t page_size = getpagesize();
1385     if (posix_memalign(&addr, page_size, page_size) < 0) {
1386         PLOG(ERROR) << "posix_memalign with page size " << page_size;
1387         return MergeFailureCode::MemAlignConsistencyCheck;
1388     }
1389 
1390     // COWs are always at least 2MB, this is guaranteed in snapshot creation.
1391     std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
1392     if (!android::base::ReadFully(fd, buffer.get(), page_size)) {
1393         PLOG(ERROR) << "Direct read failed " << cow_image_name;
1394         return MergeFailureCode::DirectReadConsistencyCheck;
1395     }
1396 
1397     auto header = reinterpret_cast<CowHeader*>(buffer.get());
1398     if (header->num_merge_ops != num_ops) {
1399         LOG(ERROR) << "COW consistency check failed, expected " << num_ops << " to be merged, "
1400                    << "but " << header->num_merge_ops << " were actually recorded.";
1401         LOG(ERROR) << "Aborting merge progress for snapshot " << name
1402                    << ", will try again next boot";
1403         return MergeFailureCode::WrongMergeCountConsistencyCheck;
1404     }
1405 
1406     return MergeFailureCode::Ok;
1407 }
1408 
MergeSecondPhaseSnapshots(LockedFile * lock)1409 MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
1410     std::vector<std::string> snapshots;
1411     if (!ListSnapshots(lock, &snapshots)) {
1412         return MergeFailureCode::ListSnapshots;
1413     }
1414 
1415     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1416     CHECK(update_status.state() == UpdateState::Merging ||
1417           update_status.state() == UpdateState::MergeFailed);
1418     CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE);
1419 
1420     update_status.set_state(UpdateState::Merging);
1421     update_status.set_merge_phase(MergePhase::SECOND_PHASE);
1422     if (!WriteSnapshotUpdateStatus(lock, update_status)) {
1423         return MergeFailureCode::WriteStatus;
1424     }
1425 
1426     MergeFailureCode result = MergeFailureCode::Ok;
1427     for (const auto& snapshot : snapshots) {
1428         SnapshotStatus snapshot_status;
1429         if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
1430             return MergeFailureCode::ReadStatus;
1431         }
1432         if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
1433             continue;
1434         }
1435         auto code = SwitchSnapshotToMerge(lock, snapshot);
1436         if (code != MergeFailureCode::Ok) {
1437             LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
1438             if (result == MergeFailureCode::Ok) {
1439                 result = code;
1440             }
1441         }
1442     }
1443     return result;
1444 }
1445 
GetSnapshotBootIndicatorPath()1446 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1447     return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1448 }
1449 
GetRollbackIndicatorPath()1450 std::string SnapshotManager::GetRollbackIndicatorPath() {
1451     return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1452 }
1453 
GetForwardMergeIndicatorPath()1454 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1455     return metadata_dir_ + "/allow-forward-merge";
1456 }
1457 
GetOldPartitionMetadataPath()1458 std::string SnapshotManager::GetOldPartitionMetadataPath() {
1459     return metadata_dir_ + "/old-partition-metadata";
1460 }
1461 
AcknowledgeMergeSuccess(LockedFile * lock)1462 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1463     // It's not possible to remove update state in recovery, so write an
1464     // indicator that cleanup is needed on reboot. If a factory data reset
1465     // was requested, it doesn't matter, everything will get wiped anyway.
1466     // To make testing easier we consider a /data wipe as cleaned up.
1467     if (device_->IsRecovery()) {
1468         WriteUpdateState(lock, UpdateState::MergeCompleted);
1469         return;
1470     }
1471 
1472     RemoveAllUpdateState(lock);
1473 
1474     if (UpdateUsesUserSnapshots(lock) && !device()->IsTestDevice()) {
1475         if (snapuserd_client_) {
1476             snapuserd_client_->DetachSnapuserd();
1477             snapuserd_client_->CloseConnection();
1478             snapuserd_client_ = nullptr;
1479         }
1480     }
1481 }
1482 
AcknowledgeMergeFailure(MergeFailureCode failure_code)1483 void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) {
1484     // Log first, so worst case, we always have a record of why the calls below
1485     // were being made.
1486     LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1487 
1488     auto lock = LockExclusive();
1489     if (!lock) return;
1490 
1491     // Since we released the lock in between WaitForMerge and here, it's
1492     // possible (1) the merge successfully completed or (2) was already
1493     // marked as a failure. So make sure to check the state again, and
1494     // only mark as a failure if appropriate.
1495     UpdateState state = ReadUpdateState(lock.get());
1496     if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1497         return;
1498     }
1499 
1500     WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code);
1501 }
1502 
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1503 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1504                                               const SnapshotStatus& status) {
1505     if (!UpdateUsesUserSnapshots(lock)) {
1506         if (IsSnapshotDevice(name)) {
1507             // We are extra-cautious here, to avoid deleting the wrong table.
1508             std::string target_type;
1509             DmTargetSnapshot::Status dm_status;
1510             if (!QuerySnapshotStatus(name, &target_type, &dm_status)) {
1511                 return false;
1512             }
1513             if (target_type != "snapshot-merge") {
1514                 LOG(ERROR) << "Unexpected target type " << target_type
1515                            << " for snapshot device: " << name;
1516                 return false;
1517             }
1518             if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1519                 LOG(ERROR) << "Merge is unexpectedly incomplete for device " << name;
1520                 return false;
1521             }
1522             if (!CollapseSnapshotDevice(lock, name, status)) {
1523                 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1524                 return false;
1525             }
1526         }
1527     } else {
1528         // Just collapse the device - no need to query again as we just did
1529         // prior to calling this function
1530         if (!CollapseSnapshotDevice(lock, name, status)) {
1531             LOG(ERROR) << "Unable to collapse snapshot: " << name;
1532             return false;
1533         }
1534     }
1535 
1536     // Note that collapsing is implicitly an Unmap, so we don't need to
1537     // unmap the snapshot.
1538 
1539     if (!DeleteSnapshot(lock, name)) {
1540         LOG(ERROR) << "Could not delete snapshot: " << name;
1541         return false;
1542     }
1543     return true;
1544 }
1545 
CollapseSnapshotDevice(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1546 bool SnapshotManager::CollapseSnapshotDevice(LockedFile* lock, const std::string& name,
1547                                              const SnapshotStatus& status) {
1548     if (!UpdateUsesUserSnapshots(lock)) {
1549         // Verify we have a snapshot-merge device.
1550         DeviceMapper::TargetInfo target;
1551         if (!GetSingleTarget(name, TableQuery::Table, &target)) {
1552             return false;
1553         }
1554         if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1555             // This should be impossible, it was checked earlier.
1556             LOG(ERROR) << "Snapshot device has invalid target type: " << name;
1557             return false;
1558         }
1559 
1560         std::string base_device, cow_device;
1561         if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1562             LOG(ERROR) << "Could not parse snapshot device " << name
1563                        << " parameters: " << target.data;
1564             return false;
1565         }
1566     }
1567 
1568     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1569     if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1570         LOG(ERROR) << "Snapshot " << name
1571                    << " size is not sector aligned: " << status.snapshot_size();
1572         return false;
1573     }
1574 
1575     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1576     // Create a DmTable that is identical to the base device.
1577     CreateLogicalPartitionParams base_device_params{
1578             .block_device = device_->GetSuperDevice(slot),
1579             .metadata_slot = slot,
1580             .partition_name = name,
1581             .partition_opener = &device_->GetPartitionOpener(),
1582     };
1583     DmTable table;
1584     if (!CreateDmTable(base_device_params, &table)) {
1585         LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1586         return false;
1587     }
1588 
1589     if (!dm_.LoadTableAndActivate(name, table)) {
1590         return false;
1591     }
1592 
1593     if (!UpdateUsesUserSnapshots(lock)) {
1594         // Attempt to delete the snapshot device if one still exists. Nothing
1595         // should be depending on the device, and device-mapper should have
1596         // flushed remaining I/O. We could in theory replace with dm-zero (or
1597         // re-use the table above), but for now it's better to know why this
1598         // would fail.
1599         //
1600         // Furthermore, we should not be trying to unmap for userspace snapshot
1601         // as unmap will fail since dm-user itself was a snapshot device prior
1602         // to switching of tables. Unmap will fail as the device will be mounted
1603         // by system partitions
1604         if (status.compression_enabled()) {
1605             auto dm_user_name = GetDmUserCowName(name, GetSnapshotDriver(lock));
1606             UnmapDmUserDevice(dm_user_name);
1607         }
1608     }
1609 
1610     // We can't delete base device immediately as daemon holds a reference.
1611     // Make sure we wait for all the worker threads to terminate and release
1612     // the reference
1613     if (UpdateUsesUserSnapshots(lock) && EnsureSnapuserdConnected()) {
1614         if (!snapuserd_client_->WaitForDeviceDelete(name)) {
1615             LOG(ERROR) << "Failed to wait for " << name << " control device to delete";
1616         }
1617     }
1618 
1619     auto base_name = GetBaseDeviceName(name);
1620     if (!DeleteDeviceIfExists(base_name)) {
1621         LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1622     }
1623 
1624     if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
1625         LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
1626     }
1627 
1628     return true;
1629 }
1630 
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1631 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1632                                             const std::function<bool()>& before_cancel) {
1633     auto slot = GetCurrentSlot();
1634     if (slot == Slot::Unknown) {
1635         return false;
1636     }
1637 
1638     // If all snapshots were reflashed, then cancel the entire update.
1639     if (AreAllSnapshotsCancelled(lock)) {
1640         LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1641         return RemoveAllUpdateState(lock, before_cancel);
1642     }
1643 
1644     // If update has been rolled back, then cancel the entire update.
1645     // Client (update_engine) is responsible for doing additional cleanup work on its own states
1646     // when ProcessUpdateState() returns UpdateState::Cancelled.
1647     auto current_slot = GetCurrentSlot();
1648     if (current_slot != Slot::Source) {
1649         LOG(INFO) << "Update state is being processed while booting at " << current_slot
1650                   << " slot, taking no action.";
1651         return false;
1652     }
1653 
1654     // current_slot == Source. Attempt to detect rollbacks.
1655     if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1656         // This unverified update is not attempted. Take no action.
1657         PLOG(INFO) << "Rollback indicator not detected. "
1658                    << "Update state is being processed before reboot, taking no action.";
1659         return false;
1660     }
1661 
1662     LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1663     return RemoveAllUpdateState(lock, before_cancel);
1664 }
1665 
PerformInitTransition(InitTransition transition,std::vector<std::string> * snapuserd_argv)1666 bool SnapshotManager::PerformInitTransition(InitTransition transition,
1667                                             std::vector<std::string>* snapuserd_argv) {
1668     LOG(INFO) << "Performing transition for snapuserd.";
1669 
1670     // Don't use EnsureSnapuserdConnected() because this is called from init,
1671     // and attempting to do so will deadlock.
1672     if (!snapuserd_client_ && transition != InitTransition::SELINUX_DETACH) {
1673         snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
1674         if (!snapuserd_client_) {
1675             LOG(ERROR) << "Unable to connect to snapuserd";
1676             return false;
1677         }
1678     }
1679 
1680     auto lock = LockExclusive();
1681     if (!lock) return false;
1682 
1683     std::vector<std::string> snapshots;
1684     if (!ListSnapshots(lock.get(), &snapshots)) {
1685         LOG(ERROR) << "Failed to list snapshots.";
1686         return false;
1687     }
1688 
1689     if (UpdateUsesUserSnapshots(lock.get()) && transition == InitTransition::SELINUX_DETACH) {
1690         snapuserd_argv->emplace_back("-user_snapshot");
1691         if (UpdateUsesIouring(lock.get())) {
1692             snapuserd_argv->emplace_back("-io_uring");
1693         }
1694     }
1695 
1696     size_t num_cows = 0;
1697     size_t ok_cows = 0;
1698     for (const auto& snapshot : snapshots) {
1699         std::string user_cow_name = GetDmUserCowName(snapshot, GetSnapshotDriver(lock.get()));
1700 
1701         if (dm_.GetState(user_cow_name) == DmDeviceState::INVALID) {
1702             continue;
1703         }
1704 
1705         DeviceMapper::TargetInfo target;
1706         if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
1707             continue;
1708         }
1709 
1710         auto target_type = DeviceMapper::GetTargetType(target.spec);
1711         if (target_type != "user") {
1712             LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
1713             continue;
1714         }
1715 
1716         num_cows++;
1717 
1718         SnapshotStatus snapshot_status;
1719         if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
1720             LOG(ERROR) << "Unable to read snapshot status: " << snapshot;
1721             continue;
1722         }
1723 
1724         auto misc_name = user_cow_name;
1725 
1726         DmTable table;
1727         table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
1728         if (!dm_.LoadTableAndActivate(user_cow_name, table)) {
1729             LOG(ERROR) << "Unable to swap tables for " << misc_name;
1730             continue;
1731         }
1732 
1733         std::string source_device_name;
1734         if (snapshot_status.old_partition_size() > 0) {
1735             source_device_name = GetSourceDeviceName(snapshot);
1736         } else {
1737             source_device_name = GetBaseDeviceName(snapshot);
1738         }
1739 
1740         std::string source_device;
1741         if (!dm_.GetDmDevicePathByName(source_device_name, &source_device)) {
1742             LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1743             continue;
1744         }
1745 
1746         std::string base_path_merge;
1747         if (!dm_.GetDmDevicePathByName(GetBaseDeviceName(snapshot), &base_path_merge)) {
1748             LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1749             continue;
1750         }
1751 
1752         std::string cow_image_name = GetMappedCowDeviceName(snapshot, snapshot_status);
1753 
1754         std::string cow_image_device;
1755         if (!dm_.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
1756             LOG(ERROR) << "Could not get device path for " << cow_image_name;
1757             continue;
1758         }
1759 
1760         // Wait for ueventd to acknowledge and create the control device node.
1761         std::string control_device = "/dev/dm-user/" + misc_name;
1762         if (!WaitForDevice(control_device, 10s)) {
1763             LOG(ERROR) << "dm-user control device no found:  " << misc_name;
1764             continue;
1765         }
1766 
1767         if (transition == InitTransition::SELINUX_DETACH) {
1768             if (!UpdateUsesUserSnapshots(lock.get())) {
1769                 auto message = misc_name + "," + cow_image_device + "," + source_device;
1770                 snapuserd_argv->emplace_back(std::move(message));
1771             } else {
1772                 auto message = misc_name + "," + cow_image_device + "," + source_device + "," +
1773                                base_path_merge;
1774                 snapuserd_argv->emplace_back(std::move(message));
1775             }
1776 
1777             // Do not attempt to connect to the new snapuserd yet, it hasn't
1778             // been started. We do however want to wait for the misc device
1779             // to have been created.
1780             ok_cows++;
1781             continue;
1782         }
1783 
1784         uint64_t base_sectors;
1785         if (!UpdateUsesUserSnapshots(lock.get())) {
1786             base_sectors =
1787                     snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, source_device);
1788         } else {
1789             base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_image_device,
1790                                                             source_device, base_path_merge);
1791         }
1792 
1793         if (base_sectors == 0) {
1794             // Unrecoverable as metadata reads from cow device failed
1795             LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd";
1796             return false;
1797         }
1798 
1799         CHECK(base_sectors <= target.spec.length);
1800 
1801         if (!snapuserd_client_->AttachDmUser(misc_name)) {
1802             // This error is unrecoverable. We cannot proceed because reads to
1803             // the underlying device will fail.
1804             LOG(FATAL) << "Could not initialize snapuserd for " << user_cow_name;
1805             return false;
1806         }
1807 
1808         ok_cows++;
1809     }
1810 
1811     if (ok_cows != num_cows) {
1812         LOG(ERROR) << "Could not transition all snapuserd consumers.";
1813         return false;
1814     }
1815     return true;
1816 }
1817 
ReadCurrentMetadata()1818 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1819     const auto& opener = device_->GetPartitionOpener();
1820     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1821     auto super_device = device_->GetSuperDevice(slot);
1822     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1823     if (!metadata) {
1824         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1825         return nullptr;
1826     }
1827     return metadata;
1828 }
1829 
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1830 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1831         const LpMetadata& metadata, const std::string& name) {
1832     auto partition = android::fs_mgr::FindPartition(metadata, name);
1833     if (!partition) return MetadataPartitionState::None;
1834     if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1835         return MetadataPartitionState::Updated;
1836     }
1837     return MetadataPartitionState::Flashed;
1838 }
1839 
AreAllSnapshotsCancelled(LockedFile * lock)1840 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1841     std::vector<std::string> snapshots;
1842     if (!ListSnapshots(lock, &snapshots)) {
1843         LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1844                      << "after applying an update. Assuming no snapshots.";
1845         // Let HandleCancelledUpdate resets UpdateState.
1846         return true;
1847     }
1848 
1849     std::map<std::string, bool> flashing_status;
1850 
1851     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1852         LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1853                      << "removing update states.";
1854         return false;
1855     }
1856 
1857     bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1858                                                [](const auto& pair) { return pair.second; });
1859 
1860     if (all_snapshots_cancelled) {
1861         LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1862     }
1863     return all_snapshots_cancelled;
1864 }
1865 
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1866 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1867                                                 const std::vector<std::string>& snapshots,
1868                                                 std::map<std::string, bool>* out) {
1869     CHECK(lock);
1870 
1871     auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1872     if (source_slot_suffix.empty()) {
1873         return false;
1874     }
1875     uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1876     uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1877 
1878     // Attempt to detect re-flashing on each partition.
1879     // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1880     // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1881     //   deleted. Caller is responsible for merging the rest of the snapshots.
1882     // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1883     //
1884     // Note that we use target slot metadata, since if an OTA has been applied
1885     // to the target slot, we can detect the UPDATED flag. Any kind of flash
1886     // operation against dynamic partitions ensures that all copies of the
1887     // metadata are in sync, so flashing all partitions on the source slot will
1888     // remove the UPDATED flag on the target slot as well.
1889     const auto& opener = device_->GetPartitionOpener();
1890     auto super_device = device_->GetSuperDevice(target_slot);
1891     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1892     if (!metadata) {
1893         return false;
1894     }
1895 
1896     for (const auto& snapshot_name : snapshots) {
1897         if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1898             MetadataPartitionState::Updated) {
1899             out->emplace(snapshot_name, false);
1900         } else {
1901             // Delete snapshots for partitions that are re-flashed after the update.
1902             LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1903             out->emplace(snapshot_name, true);
1904         }
1905     }
1906     return true;
1907 }
1908 
RemoveInvalidSnapshots(LockedFile * lock)1909 void SnapshotManager::RemoveInvalidSnapshots(LockedFile* lock) {
1910     std::vector<std::string> snapshots;
1911 
1912     // Remove the stale snapshot metadata
1913     //
1914     // We make sure that all the three cases
1915     // are valid before removing the snapshot metadata:
1916     //
1917     // 1: dm state is active
1918     // 2: Root fs is not mounted off as a snapshot device
1919     // 3: Snapshot slot suffix should match current device slot
1920     if (!ListSnapshots(lock, &snapshots, device_->GetSlotSuffix()) || snapshots.empty()) {
1921         return;
1922     }
1923 
1924     // We indeed have some invalid snapshots
1925     for (const auto& name : snapshots) {
1926         if (dm_.GetState(name) == DmDeviceState::ACTIVE && !IsSnapshotDevice(name)) {
1927             if (!DeleteSnapshot(lock, name)) {
1928                 LOG(ERROR) << "Failed to delete invalid snapshot: " << name;
1929             } else {
1930                 LOG(INFO) << "Invalid snapshot: " << name << " deleted";
1931             }
1932         }
1933     }
1934 }
1935 
RemoveAllSnapshots(LockedFile * lock)1936 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1937     std::vector<std::string> snapshots;
1938     if (!ListSnapshots(lock, &snapshots)) {
1939         LOG(ERROR) << "Could not list snapshots";
1940         return false;
1941     }
1942 
1943     std::map<std::string, bool> flashing_status;
1944     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1945         LOG(WARNING) << "Failed to get flashing status";
1946     }
1947 
1948     auto current_slot = GetCurrentSlot();
1949     bool ok = true;
1950     bool has_mapped_cow_images = false;
1951     for (const auto& name : snapshots) {
1952         // If booting off source slot, it is okay to unmap and delete all the snapshots.
1953         // If boot indicator is missing, update state is None or Initiated, so
1954         //   it is also okay to unmap and delete all the snapshots.
1955         // If booting off target slot,
1956         //  - should not unmap because:
1957         //    - In Android mode, snapshots are not mapped, but
1958         //      filesystems are mounting off dm-linear targets directly.
1959         //    - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1960         //  - If partition is flashed or unknown, it is okay to delete snapshots.
1961         //    Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1962         //    as dm-snapshot (for example, after merge completes).
1963         bool should_unmap = current_slot != Slot::Target;
1964         bool should_delete = ShouldDeleteSnapshot(flashing_status, current_slot, name);
1965         if (should_unmap && android::base::EndsWith(name, device_->GetSlotSuffix())) {
1966             // Something very unexpected has happened - we want to unmap this
1967             // snapshot, but it's on the wrong slot. We can't unmap an active
1968             // partition. If this is not really a snapshot, skip the unmap
1969             // step.
1970             if (dm_.GetState(name) == DmDeviceState::INVALID || !IsSnapshotDevice(name)) {
1971                 LOG(ERROR) << "Detected snapshot " << name << " on " << current_slot << " slot"
1972                            << " for source partition; removing without unmap.";
1973                 should_unmap = false;
1974             }
1975         }
1976 
1977         bool partition_ok = true;
1978         if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1979             partition_ok = false;
1980         }
1981         if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1982             partition_ok = false;
1983         }
1984 
1985         if (!partition_ok) {
1986             // Remember whether or not we were able to unmap the cow image.
1987             auto cow_image_device = GetCowImageDeviceName(name);
1988             has_mapped_cow_images |=
1989                     (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
1990 
1991             ok = false;
1992         }
1993     }
1994 
1995     if (ok || !has_mapped_cow_images) {
1996         // Delete any image artifacts as a precaution, in case an update is
1997         // being cancelled due to some corrupted state in an lp_metadata file.
1998         // Note that we do not do this if some cow images are still mapped,
1999         // since we must not remove backing storage if it's in use.
2000         if (!EnsureImageManager() || !images_->RemoveAllImages()) {
2001             LOG(ERROR) << "Could not remove all snapshot artifacts";
2002             return false;
2003         }
2004     }
2005     return ok;
2006 }
2007 
2008 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)2009 bool SnapshotManager::ShouldDeleteSnapshot(const std::map<std::string, bool>& flashing_status,
2010                                            Slot current_slot, const std::string& name) {
2011     if (current_slot != Slot::Target) {
2012         return true;
2013     }
2014     auto it = flashing_status.find(name);
2015     if (it == flashing_status.end()) {
2016         LOG(WARNING) << "Can't determine flashing status for " << name;
2017         return true;
2018     }
2019     if (it->second) {
2020         // partition flashed, okay to delete obsolete snapshots
2021         return true;
2022     }
2023     return !IsSnapshotDevice(name);
2024 }
2025 
GetUpdateState(double * progress)2026 UpdateState SnapshotManager::GetUpdateState(double* progress) {
2027     // If we've never started an update, the state file won't exist.
2028     auto state_file = GetStateFilePath();
2029     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2030         return UpdateState::None;
2031     }
2032 
2033     auto lock = LockShared();
2034     if (!lock) {
2035         return UpdateState::None;
2036     }
2037 
2038     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
2039     auto state = update_status.state();
2040     if (progress == nullptr) {
2041         return state;
2042     }
2043 
2044     if (state == UpdateState::MergeCompleted) {
2045         *progress = 100.0;
2046         return state;
2047     }
2048 
2049     *progress = 0.0;
2050     if (state != UpdateState::Merging) {
2051         return state;
2052     }
2053 
2054     if (!UpdateUsesUserSnapshots(lock.get())) {
2055         // Sum all the snapshot states as if the system consists of a single huge
2056         // snapshots device, then compute the merge completion percentage of that
2057         // device.
2058         std::vector<std::string> snapshots;
2059         if (!ListSnapshots(lock.get(), &snapshots)) {
2060             LOG(ERROR) << "Could not list snapshots";
2061             return state;
2062         }
2063 
2064         DmTargetSnapshot::Status fake_snapshots_status = {};
2065         for (const auto& snapshot : snapshots) {
2066             DmTargetSnapshot::Status current_status;
2067 
2068             if (!IsSnapshotDevice(snapshot)) continue;
2069             if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) continue;
2070 
2071             fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
2072             fake_snapshots_status.total_sectors += current_status.total_sectors;
2073             fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
2074         }
2075 
2076         *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
2077                                                    update_status.sectors_allocated());
2078     } else {
2079         if (EnsureSnapuserdConnected()) {
2080             *progress = snapuserd_client_->GetMergePercent();
2081         }
2082     }
2083 
2084     return state;
2085 }
2086 
UpdateUsesCompression()2087 bool SnapshotManager::UpdateUsesCompression() {
2088     auto lock = LockShared();
2089     if (!lock) return false;
2090     return UpdateUsesCompression(lock.get());
2091 }
2092 
UpdateUsesCompression(LockedFile * lock)2093 bool SnapshotManager::UpdateUsesCompression(LockedFile* lock) {
2094     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2095     return update_status.compression_enabled();
2096 }
2097 
UpdateUsesIouring(LockedFile * lock)2098 bool SnapshotManager::UpdateUsesIouring(LockedFile* lock) {
2099     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2100     return update_status.io_uring_enabled();
2101 }
2102 
UpdateUsesUserSnapshots()2103 bool SnapshotManager::UpdateUsesUserSnapshots() {
2104     // This and the following function is constantly
2105     // invoked during snapshot merge. We want to avoid
2106     // constantly reading from disk. Hence, store this
2107     // value in memory.
2108     //
2109     // Furthermore, this value in the disk is set
2110     // only when OTA is applied and doesn't change
2111     // during merge phase. Hence, once we know that
2112     // the value is read from disk the very first time,
2113     // it is safe to read successive checks from memory.
2114     if (is_snapshot_userspace_.has_value()) {
2115         return is_snapshot_userspace_.value();
2116     }
2117 
2118     auto lock = LockShared();
2119     if (!lock) return false;
2120 
2121     return UpdateUsesUserSnapshots(lock.get());
2122 }
2123 
UpdateUsesUserSnapshots(LockedFile * lock)2124 bool SnapshotManager::UpdateUsesUserSnapshots(LockedFile* lock) {
2125     // See UpdateUsesUserSnapshots()
2126     if (is_snapshot_userspace_.has_value()) {
2127         return is_snapshot_userspace_.value();
2128     }
2129 
2130     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2131     is_snapshot_userspace_ = update_status.userspace_snapshots();
2132     return is_snapshot_userspace_.value();
2133 }
2134 
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots,const std::string & suffix)2135 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
2136                                     const std::string& suffix) {
2137     CHECK(lock);
2138 
2139     auto dir_path = metadata_dir_ + "/snapshots"s;
2140     std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
2141     if (!dir) {
2142         PLOG(ERROR) << "opendir failed: " << dir_path;
2143         return false;
2144     }
2145 
2146     struct dirent* dp;
2147     while ((dp = readdir(dir.get())) != nullptr) {
2148         if (dp->d_type != DT_REG) continue;
2149 
2150         std::string name(dp->d_name);
2151         if (!suffix.empty() && !android::base::EndsWith(name, suffix)) {
2152             continue;
2153         }
2154 
2155         // Insert system and product partition at the beginning so that
2156         // during snapshot-merge, these partitions are merged first.
2157         if (name == "system_a" || name == "system_b" || name == "product_a" ||
2158             name == "product_b") {
2159             snapshots->insert(snapshots->begin(), std::move(name));
2160         } else {
2161             snapshots->emplace_back(std::move(name));
2162         }
2163     }
2164 
2165     return true;
2166 }
2167 
IsSnapshotManagerNeeded()2168 bool SnapshotManager::IsSnapshotManagerNeeded() {
2169     return access(kBootIndicatorPath, F_OK) == 0;
2170 }
2171 
GetGlobalRollbackIndicatorPath()2172 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
2173     return kRollbackIndicatorPath;
2174 }
2175 
NeedSnapshotsInFirstStageMount()2176 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
2177     // If we fail to read, we'll wind up using CreateLogicalPartitions, which
2178     // will create devices that look like the old slot, except with extra
2179     // content at the end of each device. This will confuse dm-verity, and
2180     // ultimately we'll fail to boot. Why not make it a fatal error and have
2181     // the reason be clearer? Because the indicator file still exists, and
2182     // if this was FATAL, reverting to the old slot would be broken.
2183     auto slot = GetCurrentSlot();
2184 
2185     if (slot != Slot::Target) {
2186         if (slot == Slot::Source) {
2187             // Device is rebooting into the original slot, so mark this as a
2188             // rollback.
2189             auto path = GetRollbackIndicatorPath();
2190             if (!android::base::WriteStringToFile("1", path)) {
2191                 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
2192             } else {
2193                 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
2194             }
2195         }
2196         LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
2197         return false;
2198     }
2199 
2200     // If we can't read the update state, it's unlikely anything else will
2201     // succeed, so this is a fatal error. We'll eventually exhaust boot
2202     // attempts and revert to the old slot.
2203     auto lock = LockShared();
2204     if (!lock) {
2205         LOG(FATAL) << "Could not read update state to determine snapshot status";
2206         return false;
2207     }
2208     switch (ReadUpdateState(lock.get())) {
2209         case UpdateState::Unverified:
2210         case UpdateState::Merging:
2211         case UpdateState::MergeFailed:
2212             return true;
2213         default:
2214             return false;
2215     }
2216 }
2217 
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)2218 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
2219         const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
2220     LOG(INFO) << "Creating logical partitions with snapshots as needed";
2221 
2222     auto lock = LockExclusive();
2223     if (!lock) return false;
2224 
2225     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2226     return MapAllPartitions(lock.get(), super_device, slot, timeout_ms);
2227 }
2228 
MapAllPartitions(LockedFile * lock,const std::string & super_device,uint32_t slot,const std::chrono::milliseconds & timeout_ms)2229 bool SnapshotManager::MapAllPartitions(LockedFile* lock, const std::string& super_device,
2230                                        uint32_t slot, const std::chrono::milliseconds& timeout_ms) {
2231     const auto& opener = device_->GetPartitionOpener();
2232     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
2233     if (!metadata) {
2234         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
2235         return false;
2236     }
2237 
2238     if (!EnsureImageManager()) {
2239         return false;
2240     }
2241 
2242     for (const auto& partition : metadata->partitions) {
2243         if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
2244             LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
2245                       << kCowGroupName;
2246             continue;
2247         }
2248 
2249         CreateLogicalPartitionParams params = {
2250                 .block_device = super_device,
2251                 .metadata = metadata.get(),
2252                 .partition = &partition,
2253                 .partition_opener = &opener,
2254                 .timeout_ms = timeout_ms,
2255         };
2256         if (!MapPartitionWithSnapshot(lock, std::move(params), SnapshotContext::Mount, nullptr)) {
2257             return false;
2258         }
2259     }
2260 
2261     LOG(INFO) << "Created logical partitions with snapshot.";
2262     return true;
2263 }
2264 
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)2265 static std::chrono::milliseconds GetRemainingTime(
2266         const std::chrono::milliseconds& timeout,
2267         const std::chrono::time_point<std::chrono::steady_clock>& begin) {
2268     // If no timeout is specified, execute all commands without specifying any timeout.
2269     if (timeout.count() == 0) return std::chrono::milliseconds(0);
2270     auto passed_time = std::chrono::steady_clock::now() - begin;
2271     auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
2272     if (remaining_time.count() <= 0) {
2273         LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
2274                    << remaining_time.count() << "ms remaining)";
2275         // Return min() instead of remaining_time here because 0 is treated as a special value for
2276         // no timeout, where the rest of the commands will still be executed.
2277         return std::chrono::milliseconds::min();
2278     }
2279     return remaining_time;
2280 }
2281 
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,SnapshotContext context,SnapshotPaths * paths)2282 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
2283                                                CreateLogicalPartitionParams params,
2284                                                SnapshotContext context, SnapshotPaths* paths) {
2285     auto begin = std::chrono::steady_clock::now();
2286 
2287     CHECK(lock);
2288 
2289     if (params.GetPartitionName() != params.GetDeviceName()) {
2290         LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
2291                    << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
2292         return false;
2293     }
2294 
2295     // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
2296     // reading super partition metadata).
2297     CreateLogicalPartitionParams::OwnedData params_owned_data;
2298     if (!params.InitDefaults(&params_owned_data)) {
2299         return false;
2300     }
2301 
2302     if (!params.partition->num_extents) {
2303         LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
2304         return true;  // leave path empty to indicate that nothing is mapped.
2305     }
2306 
2307     // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
2308     // partition still has a snapshot that needs to be mapped.  If no live snapshot or merge
2309     // completed, live_snapshot_status is set to nullopt.
2310     std::optional<SnapshotStatus> live_snapshot_status;
2311     do {
2312         if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
2313             LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
2314                       << params.GetPartitionName();
2315             break;
2316         }
2317         auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
2318         if (access(file_path.c_str(), F_OK) != 0) {
2319             if (errno != ENOENT) {
2320                 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
2321                            << ": Can't access " << file_path;
2322                 return false;
2323             }
2324             break;
2325         }
2326         live_snapshot_status = std::make_optional<SnapshotStatus>();
2327         if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
2328             return false;
2329         }
2330         // No live snapshot if merge is completed.
2331         if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
2332             live_snapshot_status.reset();
2333         }
2334 
2335         if (live_snapshot_status->state() == SnapshotState::NONE ||
2336             live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
2337                     0) {
2338             LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
2339                          << " is invalid, ignoring: state = "
2340                          << SnapshotState_Name(live_snapshot_status->state())
2341                          << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
2342                          << ", cow_file_size = " << live_snapshot_status->cow_file_size();
2343             live_snapshot_status.reset();
2344         }
2345     } while (0);
2346 
2347     if (live_snapshot_status.has_value()) {
2348         // dm-snapshot requires the base device to be writable.
2349         params.force_writable = true;
2350         // Map the base device with a different name to avoid collision.
2351         params.device_name = GetBaseDeviceName(params.GetPartitionName());
2352     }
2353 
2354     AutoDeviceList created_devices;
2355 
2356     // Create the base device for the snapshot, or if there is no snapshot, the
2357     // device itself. This device consists of the real blocks in the super
2358     // partition that this logical partition occupies.
2359     std::string base_path;
2360     if (!CreateLogicalPartition(params, &base_path)) {
2361         LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
2362                    << " as device " << params.GetDeviceName();
2363         return false;
2364     }
2365     created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, params.GetDeviceName());
2366 
2367     if (paths) {
2368         paths->target_device = base_path;
2369     }
2370 
2371     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2372     if (remaining_time.count() < 0) {
2373         return false;
2374     }
2375 
2376     // Wait for the base device to appear
2377     if (!WaitForDevice(base_path, remaining_time)) {
2378         return false;
2379     }
2380 
2381     if (!live_snapshot_status.has_value()) {
2382         created_devices.Release();
2383         return true;
2384     }
2385 
2386     // We don't have ueventd in first-stage init, so use device major:minor
2387     // strings instead.
2388     std::string base_device;
2389     if (!dm_.GetDeviceString(params.GetDeviceName(), &base_device)) {
2390         LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
2391         return false;
2392     }
2393 
2394     remaining_time = GetRemainingTime(params.timeout_ms, begin);
2395     if (remaining_time.count() < 0) return false;
2396 
2397     std::string cow_name;
2398     CreateLogicalPartitionParams cow_params = params;
2399     cow_params.timeout_ms = remaining_time;
2400     if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
2401         return false;
2402     }
2403     std::string cow_device;
2404     if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
2405         LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
2406         return false;
2407     }
2408     if (paths) {
2409         paths->cow_device_name = cow_name;
2410     }
2411 
2412     remaining_time = GetRemainingTime(params.timeout_ms, begin);
2413     if (remaining_time.count() < 0) return false;
2414 
2415     if (context == SnapshotContext::Update && live_snapshot_status->compression_enabled()) {
2416         // Stop here, we can't run dm-user yet, the COW isn't built.
2417         created_devices.Release();
2418         return true;
2419     }
2420 
2421     if (live_snapshot_status->compression_enabled()) {
2422         // Get the source device (eg the view of the partition from before it was resized).
2423         std::string source_device_path;
2424         if (live_snapshot_status->old_partition_size() > 0) {
2425             if (!MapSourceDevice(lock, params.GetPartitionName(), remaining_time,
2426                                  &source_device_path)) {
2427                 LOG(ERROR) << "Could not map source device for: " << cow_name;
2428                 return false;
2429             }
2430 
2431             auto source_device = GetSourceDeviceName(params.GetPartitionName());
2432             created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, source_device);
2433         } else {
2434             source_device_path = base_path;
2435         }
2436 
2437         if (!WaitForDevice(source_device_path, remaining_time)) {
2438             return false;
2439         }
2440 
2441         std::string cow_path;
2442         if (!GetMappedImageDevicePath(cow_name, &cow_path)) {
2443             LOG(ERROR) << "Could not determine path for: " << cow_name;
2444             return false;
2445         }
2446         if (!WaitForDevice(cow_path, remaining_time)) {
2447             return false;
2448         }
2449 
2450         auto name = GetDmUserCowName(params.GetPartitionName(), GetSnapshotDriver(lock));
2451 
2452         std::string new_cow_device;
2453         if (!MapDmUserCow(lock, name, cow_path, source_device_path, base_path, remaining_time,
2454                           &new_cow_device)) {
2455             LOG(ERROR) << "Could not map dm-user device for partition "
2456                        << params.GetPartitionName();
2457             return false;
2458         }
2459         created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, name);
2460 
2461         remaining_time = GetRemainingTime(params.timeout_ms, begin);
2462         if (remaining_time.count() < 0) return false;
2463 
2464         cow_device = new_cow_device;
2465     }
2466 
2467     // For userspace snapshots, dm-user block device itself will act as a
2468     // snapshot device. There is one subtle difference - MapSnapshot will create
2469     // either snapshot target or snapshot-merge target based on the underlying
2470     // state of the snapshot device. If snapshot-merge target is created, merge
2471     // will immediately start in the kernel.
2472     //
2473     // This is no longer true with respect to userspace snapshots. When dm-user
2474     // block device is created, we just have the snapshots ready but daemon in
2475     // the user-space will not start the merge. We have to explicitly inform the
2476     // daemon to resume the merge. Check ProcessUpdateState() call stack.
2477     if (!UpdateUsesUserSnapshots(lock)) {
2478         std::string path;
2479         if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
2480                          &path)) {
2481             LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
2482             return false;
2483         }
2484         // No need to add params.GetPartitionName() to created_devices since it is immediately
2485         // released.
2486 
2487         if (paths) {
2488             paths->snapshot_device = path;
2489         }
2490         LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << path;
2491     } else {
2492         LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at "
2493                   << cow_device;
2494     }
2495 
2496     created_devices.Release();
2497 
2498     return true;
2499 }
2500 
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)2501 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
2502                                                  const std::string& target_partition_name) {
2503     CHECK(lock);
2504 
2505     if (!UnmapSnapshot(lock, target_partition_name)) {
2506         return false;
2507     }
2508 
2509     if (!UnmapCowDevices(lock, target_partition_name)) {
2510         return false;
2511     }
2512 
2513     auto base_name = GetBaseDeviceName(target_partition_name);
2514     if (!DeleteDeviceIfExists(base_name)) {
2515         LOG(ERROR) << "Cannot delete base device: " << base_name;
2516         return false;
2517     }
2518 
2519     auto source_name = GetSourceDeviceName(target_partition_name);
2520     if (!DeleteDeviceIfExists(source_name)) {
2521         LOG(ERROR) << "Cannot delete source device: " << source_name;
2522         return false;
2523     }
2524 
2525     LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
2526 
2527     return true;
2528 }
2529 
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)2530 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
2531                                     const SnapshotStatus& snapshot_status,
2532                                     AutoDeviceList* created_devices, std::string* cow_name) {
2533     CHECK(lock);
2534     CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
2535     auto begin = std::chrono::steady_clock::now();
2536 
2537     std::string partition_name = params.GetPartitionName();
2538     std::string cow_image_name = GetCowImageDeviceName(partition_name);
2539     *cow_name = GetCowName(partition_name);
2540 
2541     // Map COW image if necessary.
2542     if (snapshot_status.cow_file_size() > 0) {
2543         if (!EnsureImageManager()) return false;
2544         auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2545         if (remaining_time.count() < 0) return false;
2546 
2547         if (!MapCowImage(partition_name, remaining_time).has_value()) {
2548             LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
2549             return false;
2550         }
2551         created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
2552 
2553         // If no COW partition exists, just return the image alone.
2554         if (snapshot_status.cow_partition_size() == 0) {
2555             *cow_name = std::move(cow_image_name);
2556             LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
2557             return true;
2558         }
2559     }
2560 
2561     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2562     if (remaining_time.count() < 0) return false;
2563 
2564     CHECK(snapshot_status.cow_partition_size() > 0);
2565 
2566     // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
2567     // COW image device as the last extent.
2568     CreateLogicalPartitionParams cow_partition_params = params;
2569     cow_partition_params.partition = nullptr;
2570     cow_partition_params.partition_name = *cow_name;
2571     cow_partition_params.device_name.clear();
2572     DmTable table;
2573     if (!CreateDmTable(cow_partition_params, &table)) {
2574         return false;
2575     }
2576     // If the COW image exists, append it as the last extent.
2577     if (snapshot_status.cow_file_size() > 0) {
2578         std::string cow_image_device;
2579         if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
2580             LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
2581             return false;
2582         }
2583         auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
2584         auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
2585         table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
2586                                       0);
2587     }
2588 
2589     // We have created the DmTable now. Map it.
2590     std::string cow_path;
2591     if (!dm_.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
2592         LOG(ERROR) << "Could not create COW device: " << *cow_name;
2593         return false;
2594     }
2595     created_devices->EmplaceBack<AutoUnmapDevice>(&dm_, *cow_name);
2596     LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
2597     return true;
2598 }
2599 
UnmapCowDevices(LockedFile * lock,const std::string & name)2600 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
2601     CHECK(lock);
2602     if (!EnsureImageManager()) return false;
2603 
2604     if (UpdateUsesCompression(lock) && !UpdateUsesUserSnapshots(lock)) {
2605         auto dm_user_name = GetDmUserCowName(name, GetSnapshotDriver(lock));
2606         if (!UnmapDmUserDevice(dm_user_name)) {
2607             return false;
2608         }
2609     }
2610 
2611     if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
2612         LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
2613         return false;
2614     }
2615 
2616     std::string cow_image_name = GetCowImageDeviceName(name);
2617     if (!images_->UnmapImageIfExists(cow_image_name)) {
2618         LOG(ERROR) << "Cannot unmap image " << cow_image_name;
2619         return false;
2620     }
2621     return true;
2622 }
2623 
UnmapDmUserDevice(const std::string & dm_user_name)2624 bool SnapshotManager::UnmapDmUserDevice(const std::string& dm_user_name) {
2625     if (dm_.GetState(dm_user_name) == DmDeviceState::INVALID) {
2626         return true;
2627     }
2628 
2629     if (!DeleteDeviceIfExists(dm_user_name)) {
2630         LOG(ERROR) << "Cannot unmap " << dm_user_name;
2631         return false;
2632     }
2633 
2634     if (EnsureSnapuserdConnected()) {
2635         if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2636             LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2637             return false;
2638         }
2639     }
2640 
2641     // Ensure the control device is gone so we don't run into ABA problems.
2642     auto control_device = "/dev/dm-user/" + dm_user_name;
2643     if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2644         LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2645         return false;
2646     }
2647     return true;
2648 }
2649 
UnmapUserspaceSnapshotDevice(LockedFile * lock,const std::string & snapshot_name)2650 bool SnapshotManager::UnmapUserspaceSnapshotDevice(LockedFile* lock,
2651                                                    const std::string& snapshot_name) {
2652     auto dm_user_name = GetDmUserCowName(snapshot_name, GetSnapshotDriver(lock));
2653     if (dm_.GetState(dm_user_name) == DmDeviceState::INVALID) {
2654         return true;
2655     }
2656 
2657     CHECK(lock);
2658 
2659     SnapshotStatus snapshot_status;
2660 
2661     if (!ReadSnapshotStatus(lock, snapshot_name, &snapshot_status)) {
2662         return false;
2663     }
2664     // If the merge is complete, then we switch dm tables which is equivalent
2665     // to unmap; hence, we can't be deleting the device
2666     // as the table would be mounted off partitions and will fail.
2667     if (snapshot_status.state() != SnapshotState::MERGE_COMPLETED) {
2668         if (!DeleteDeviceIfExists(dm_user_name)) {
2669             LOG(ERROR) << "Cannot unmap " << dm_user_name;
2670             return false;
2671         }
2672     }
2673 
2674     if (EnsureSnapuserdConnected()) {
2675         if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2676             LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2677             return false;
2678         }
2679     }
2680 
2681     // Ensure the control device is gone so we don't run into ABA problems.
2682     auto control_device = "/dev/dm-user/" + dm_user_name;
2683     if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2684         LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2685         return false;
2686     }
2687     return true;
2688 }
2689 
MapAllSnapshots(const std::chrono::milliseconds & timeout_ms)2690 bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds& timeout_ms) {
2691     auto lock = LockExclusive();
2692     if (!lock) return false;
2693 
2694     auto state = ReadUpdateState(lock.get());
2695     if (state == UpdateState::Unverified) {
2696         if (GetCurrentSlot() == Slot::Target) {
2697             LOG(ERROR) << "Cannot call MapAllSnapshots when booting from the target slot.";
2698             return false;
2699         }
2700     } else if (state != UpdateState::Initiated) {
2701         LOG(ERROR) << "Cannot call MapAllSnapshots from update state: " << state;
2702         return false;
2703     }
2704 
2705     std::vector<std::string> snapshots;
2706     if (!ListSnapshots(lock.get(), &snapshots)) {
2707         return false;
2708     }
2709 
2710     const auto& opener = device_->GetPartitionOpener();
2711     auto slot_suffix = device_->GetOtherSlotSuffix();
2712     auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2713     auto super_device = device_->GetSuperDevice(slot_number);
2714     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot_number);
2715     if (!metadata) {
2716         LOG(ERROR) << "MapAllSnapshots could not read dynamic partition metadata for device: "
2717                    << super_device;
2718         return false;
2719     }
2720 
2721     for (const auto& snapshot : snapshots) {
2722         if (!UnmapPartitionWithSnapshot(lock.get(), snapshot)) {
2723             LOG(ERROR) << "MapAllSnapshots could not unmap snapshot: " << snapshot;
2724             return false;
2725         }
2726 
2727         CreateLogicalPartitionParams params = {
2728                 .block_device = super_device,
2729                 .metadata = metadata.get(),
2730                 .partition_name = snapshot,
2731                 .partition_opener = &opener,
2732                 .timeout_ms = timeout_ms,
2733         };
2734         if (!MapPartitionWithSnapshot(lock.get(), std::move(params), SnapshotContext::Mount,
2735                                       nullptr)) {
2736             LOG(ERROR) << "MapAllSnapshots failed to map: " << snapshot;
2737             return false;
2738         }
2739     }
2740 
2741     LOG(INFO) << "MapAllSnapshots succeeded.";
2742     return true;
2743 }
2744 
UnmapAllSnapshots()2745 bool SnapshotManager::UnmapAllSnapshots() {
2746     auto lock = LockExclusive();
2747     if (!lock) return false;
2748 
2749     return UnmapAllSnapshots(lock.get());
2750 }
2751 
UnmapAllSnapshots(LockedFile * lock)2752 bool SnapshotManager::UnmapAllSnapshots(LockedFile* lock) {
2753     std::vector<std::string> snapshots;
2754     if (!ListSnapshots(lock, &snapshots)) {
2755         return false;
2756     }
2757 
2758     for (const auto& snapshot : snapshots) {
2759         if (!UnmapPartitionWithSnapshot(lock, snapshot)) {
2760             LOG(ERROR) << "Failed to unmap snapshot: " << snapshot;
2761             return false;
2762         }
2763     }
2764 
2765     // Terminate the daemon and release the snapuserd_client_ object.
2766     // If we need to re-connect with the daemon, EnsureSnapuserdConnected()
2767     // will re-create the object and establish the socket connection.
2768     if (snapuserd_client_) {
2769         LOG(INFO) << "Shutdown snapuserd daemon";
2770         snapuserd_client_->DetachSnapuserd();
2771         snapuserd_client_->CloseConnection();
2772         snapuserd_client_ = nullptr;
2773     }
2774 
2775     return true;
2776 }
2777 
OpenFile(const std::string & file,int lock_flags)2778 auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
2779         -> std::unique_ptr<LockedFile> {
2780     unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2781     if (fd < 0) {
2782         PLOG(ERROR) << "Open failed: " << file;
2783         return nullptr;
2784     }
2785     if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
2786         PLOG(ERROR) << "Acquire flock failed: " << file;
2787         return nullptr;
2788     }
2789     // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
2790     // calls, so strip extra flags.
2791     int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
2792     return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
2793 }
2794 
~LockedFile()2795 SnapshotManager::LockedFile::~LockedFile() {
2796     if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
2797         PLOG(ERROR) << "Failed to unlock file: " << path_;
2798     }
2799 }
2800 
GetStateFilePath() const2801 std::string SnapshotManager::GetStateFilePath() const {
2802     return metadata_dir_ + "/state"s;
2803 }
2804 
GetMergeStateFilePath() const2805 std::string SnapshotManager::GetMergeStateFilePath() const {
2806     return metadata_dir_ + "/merge_state"s;
2807 }
2808 
GetLockPath() const2809 std::string SnapshotManager::GetLockPath() const {
2810     return metadata_dir_;
2811 }
2812 
OpenLock(int lock_flags)2813 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
2814     auto lock_file = GetLockPath();
2815     return OpenFile(lock_file, lock_flags);
2816 }
2817 
LockShared()2818 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
2819     return OpenLock(LOCK_SH);
2820 }
2821 
LockExclusive()2822 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
2823     return OpenLock(LOCK_EX);
2824 }
2825 
UpdateStateFromString(const std::string & contents)2826 static UpdateState UpdateStateFromString(const std::string& contents) {
2827     if (contents.empty() || contents == "none") {
2828         return UpdateState::None;
2829     } else if (contents == "initiated") {
2830         return UpdateState::Initiated;
2831     } else if (contents == "unverified") {
2832         return UpdateState::Unverified;
2833     } else if (contents == "merging") {
2834         return UpdateState::Merging;
2835     } else if (contents == "merge-completed") {
2836         return UpdateState::MergeCompleted;
2837     } else if (contents == "merge-needs-reboot") {
2838         return UpdateState::MergeNeedsReboot;
2839     } else if (contents == "merge-failed") {
2840         return UpdateState::MergeFailed;
2841     } else if (contents == "cancelled") {
2842         return UpdateState::Cancelled;
2843     } else {
2844         LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
2845         return UpdateState::None;
2846     }
2847 }
2848 
operator <<(std::ostream & os,UpdateState state)2849 std::ostream& operator<<(std::ostream& os, UpdateState state) {
2850     switch (state) {
2851         case UpdateState::None:
2852             return os << "none";
2853         case UpdateState::Initiated:
2854             return os << "initiated";
2855         case UpdateState::Unverified:
2856             return os << "unverified";
2857         case UpdateState::Merging:
2858             return os << "merging";
2859         case UpdateState::MergeCompleted:
2860             return os << "merge-completed";
2861         case UpdateState::MergeNeedsReboot:
2862             return os << "merge-needs-reboot";
2863         case UpdateState::MergeFailed:
2864             return os << "merge-failed";
2865         case UpdateState::Cancelled:
2866             return os << "cancelled";
2867         default:
2868             LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
2869             return os;
2870     }
2871 }
2872 
ReadUpdateState(LockedFile * lock)2873 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
2874     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
2875     return status.state();
2876 }
2877 
ReadSnapshotUpdateStatus(LockedFile * lock)2878 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
2879     CHECK(lock);
2880 
2881     SnapshotUpdateStatus status = {};
2882     std::string contents;
2883     if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
2884         PLOG(ERROR) << "Read state file failed";
2885         status.set_state(UpdateState::None);
2886         return status;
2887     }
2888 
2889     if (!status.ParseFromString(contents)) {
2890         LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
2891 
2892         // Try to rollback to legacy file to support devices that are
2893         // currently using the old file format.
2894         // TODO(b/147409432)
2895         status.set_state(UpdateStateFromString(contents));
2896     }
2897 
2898     return status;
2899 }
2900 
WriteUpdateState(LockedFile * lock,UpdateState state,MergeFailureCode failure_code)2901 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state,
2902                                        MergeFailureCode failure_code) {
2903     SnapshotUpdateStatus status;
2904     status.set_state(state);
2905 
2906     switch (state) {
2907         case UpdateState::MergeFailed:
2908             status.set_merge_failure_code(failure_code);
2909             break;
2910         case UpdateState::Initiated:
2911             status.set_source_build_fingerprint(
2912                     android::base::GetProperty("ro.build.fingerprint", ""));
2913             break;
2914         default:
2915             break;
2916     }
2917 
2918     // If we're transitioning between two valid states (eg, we're not beginning
2919     // or ending an OTA), then make sure to propagate the compression bit and
2920     // build fingerprint.
2921     if (!(state == UpdateState::Initiated || state == UpdateState::None)) {
2922         SnapshotUpdateStatus old_status = ReadSnapshotUpdateStatus(lock);
2923         status.set_compression_enabled(old_status.compression_enabled());
2924         status.set_source_build_fingerprint(old_status.source_build_fingerprint());
2925         status.set_merge_phase(old_status.merge_phase());
2926         status.set_userspace_snapshots(old_status.userspace_snapshots());
2927         status.set_io_uring_enabled(old_status.io_uring_enabled());
2928     }
2929     return WriteSnapshotUpdateStatus(lock, status);
2930 }
2931 
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)2932 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
2933                                                 const SnapshotUpdateStatus& status) {
2934     CHECK(lock);
2935     CHECK(lock->lock_mode() == LOCK_EX);
2936 
2937     std::string contents;
2938     if (!status.SerializeToString(&contents)) {
2939         LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
2940         return false;
2941     }
2942 
2943 #ifdef LIBSNAPSHOT_USE_HAL
2944     auto merge_status = MergeStatus::UNKNOWN;
2945     switch (status.state()) {
2946         // The needs-reboot and completed cases imply that /data and /metadata
2947         // can be safely wiped, so we don't report a merge status.
2948         case UpdateState::None:
2949         case UpdateState::MergeNeedsReboot:
2950         case UpdateState::MergeCompleted:
2951         case UpdateState::Initiated:
2952             merge_status = MergeStatus::NONE;
2953             break;
2954         case UpdateState::Unverified:
2955             merge_status = MergeStatus::SNAPSHOTTED;
2956             break;
2957         case UpdateState::Merging:
2958         case UpdateState::MergeFailed:
2959             merge_status = MergeStatus::MERGING;
2960             break;
2961         default:
2962             // Note that Cancelled flows to here - it is never written, since
2963             // it only communicates a transient state to the caller.
2964             LOG(ERROR) << "Unexpected update status: " << status.state();
2965             break;
2966     }
2967 
2968     bool set_before_write =
2969             merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
2970     if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2971         return false;
2972     }
2973 #endif
2974 
2975     if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
2976         PLOG(ERROR) << "Could not write to state file";
2977         return false;
2978     }
2979 
2980 #ifdef LIBSNAPSHOT_USE_HAL
2981     if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2982         return false;
2983     }
2984 #endif
2985     return true;
2986 }
2987 
GetSnapshotStatusFilePath(const std::string & name)2988 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
2989     auto file = metadata_dir_ + "/snapshots/"s + name;
2990     return file;
2991 }
2992 
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)2993 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
2994                                          SnapshotStatus* status) {
2995     CHECK(lock);
2996     auto path = GetSnapshotStatusFilePath(name);
2997 
2998     unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2999     if (fd < 0) {
3000         PLOG(ERROR) << "Open failed: " << path;
3001         return false;
3002     }
3003 
3004     if (!status->ParseFromFileDescriptor(fd.get())) {
3005         PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
3006         return false;
3007     }
3008 
3009     if (status->name() != name) {
3010         LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
3011         status->set_name(name);
3012     }
3013 
3014     return true;
3015 }
3016 
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)3017 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
3018     // The caller must take an exclusive lock to modify snapshots.
3019     CHECK(lock);
3020     CHECK(lock->lock_mode() == LOCK_EX);
3021     CHECK(!status.name().empty());
3022 
3023     auto path = GetSnapshotStatusFilePath(status.name());
3024 
3025     std::string content;
3026     if (!status.SerializeToString(&content)) {
3027         LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
3028         return false;
3029     }
3030 
3031     if (!WriteStringToFileAtomic(content, path)) {
3032         PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
3033         return false;
3034     }
3035 
3036     return true;
3037 }
3038 
EnsureImageManager()3039 bool SnapshotManager::EnsureImageManager() {
3040     if (images_) return true;
3041 
3042     images_ = device_->OpenImageManager();
3043     if (!images_) {
3044         LOG(ERROR) << "Could not open ImageManager";
3045         return false;
3046     }
3047     return true;
3048 }
3049 
EnsureSnapuserdConnected()3050 bool SnapshotManager::EnsureSnapuserdConnected() {
3051     if (snapuserd_client_) {
3052         return true;
3053     }
3054 
3055     if (!use_first_stage_snapuserd_ && !EnsureSnapuserdStarted()) {
3056         return false;
3057     }
3058 
3059     snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
3060     if (!snapuserd_client_) {
3061         LOG(ERROR) << "Unable to connect to snapuserd";
3062         return false;
3063     }
3064     return true;
3065 }
3066 
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)3067 void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
3068     std::vector<std::string> to_delete;
3069     for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
3070         if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
3071             LOG(WARNING) << existing_cow_partition->name()
3072                          << " cannot be unmapped and its space cannot be reclaimed";
3073             continue;
3074         }
3075         to_delete.push_back(existing_cow_partition->name());
3076     }
3077     for (const auto& name : to_delete) {
3078         current_metadata->RemovePartition(name);
3079     }
3080 }
3081 
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3082 static Return AddRequiredSpace(Return orig,
3083                                const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3084     if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
3085         return orig;
3086     }
3087     uint64_t sum = 0;
3088     for (auto&& [name, status] : all_snapshot_status) {
3089         sum += status.cow_file_size();
3090     }
3091     return Return::NoSpace(sum);
3092 }
3093 
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)3094 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
3095     auto lock = LockExclusive();
3096     if (!lock) return Return::Error();
3097 
3098     auto update_state = ReadUpdateState(lock.get());
3099     if (update_state != UpdateState::Initiated) {
3100         LOG(ERROR) << "Cannot create update snapshots in state " << update_state;
3101         return Return::Error();
3102     }
3103 
3104     // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
3105     // partition takes up a big chunk of space in super, causing COW images to be created on
3106     // retrofit Virtual A/B devices.
3107     if (device_->IsOverlayfsSetup()) {
3108         LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
3109                    << ", reboot, then try again.";
3110         return Return::Error();
3111     }
3112 
3113     const auto& opener = device_->GetPartitionOpener();
3114     auto current_suffix = device_->GetSlotSuffix();
3115     uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
3116     auto target_suffix = device_->GetOtherSlotSuffix();
3117     uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
3118     auto current_super = device_->GetSuperDevice(current_slot);
3119 
3120     auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
3121     if (current_metadata == nullptr) {
3122         LOG(ERROR) << "Cannot create metadata builder.";
3123         return Return::Error();
3124     }
3125 
3126     auto target_metadata =
3127             MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
3128     if (target_metadata == nullptr) {
3129         LOG(ERROR) << "Cannot create target metadata builder.";
3130         return Return::Error();
3131     }
3132 
3133     // Delete partitions with target suffix in |current_metadata|. Otherwise,
3134     // partition_cow_creator recognizes these left-over partitions as used space.
3135     for (const auto& group_name : current_metadata->ListGroups()) {
3136         if (android::base::EndsWith(group_name, target_suffix)) {
3137             current_metadata->RemoveGroupAndPartitions(group_name);
3138         }
3139     }
3140 
3141     SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
3142     if (!metadata_updater.Update()) {
3143         LOG(ERROR) << "Cannot calculate new metadata.";
3144         return Return::Error();
3145     }
3146 
3147     // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
3148     // free regions.
3149     UnmapAndDeleteCowPartition(current_metadata.get());
3150 
3151     // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
3152     // devices with retrofit dynamic partitions does not make sense.
3153     // This ensures that current_metadata->GetFreeRegions() uses the same device
3154     // indices as target_metadata (i.e. 0 -> "super").
3155     // This is also assumed in MapCowDevices() call below.
3156     CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
3157           target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
3158 
3159     std::map<std::string, SnapshotStatus> all_snapshot_status;
3160 
3161     // In case of error, automatically delete devices that are created along the way.
3162     // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
3163     // these devices.
3164     AutoDeviceList created_devices;
3165 
3166     const auto& dap_metadata = manifest.dynamic_partition_metadata();
3167     CowOptions options;
3168     CowWriter writer(options);
3169     bool cow_format_support = true;
3170     if (dap_metadata.cow_version() < writer.GetCowVersion()) {
3171         cow_format_support = false;
3172     }
3173 
3174     LOG(INFO) << " dap_metadata.cow_version(): " << dap_metadata.cow_version()
3175               << " writer.GetCowVersion(): " << writer.GetCowVersion();
3176 
3177     bool use_compression = IsCompressionEnabled() && dap_metadata.vabc_enabled() &&
3178                            !device_->IsRecovery() && cow_format_support;
3179 
3180     std::string compression_algorithm;
3181     if (use_compression) {
3182         compression_algorithm = dap_metadata.vabc_compression_param();
3183         if (compression_algorithm.empty()) {
3184             // Older OTAs don't set an explicit compression type, so default to gz.
3185             compression_algorithm = "gz";
3186         }
3187     } else {
3188         compression_algorithm = "none";
3189     }
3190 
3191     PartitionCowCreator cow_creator{
3192             .target_metadata = target_metadata.get(),
3193             .target_suffix = target_suffix,
3194             .target_partition = nullptr,
3195             .current_metadata = current_metadata.get(),
3196             .current_suffix = current_suffix,
3197             .update = nullptr,
3198             .extra_extents = {},
3199             .compression_enabled = use_compression,
3200             .compression_algorithm = compression_algorithm,
3201     };
3202 
3203     auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
3204                                              &all_snapshot_status);
3205     if (!ret.is_ok()) return ret;
3206 
3207     auto exported_target_metadata = target_metadata->Export();
3208     if (exported_target_metadata == nullptr) {
3209         LOG(ERROR) << "Cannot export target metadata";
3210         return Return::Error();
3211     }
3212 
3213     ret = InitializeUpdateSnapshots(lock.get(), target_metadata.get(),
3214                                     exported_target_metadata.get(), target_suffix,
3215                                     all_snapshot_status);
3216     if (!ret.is_ok()) return ret;
3217 
3218     if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
3219                               *exported_target_metadata, target_slot)) {
3220         LOG(ERROR) << "Cannot write target metadata";
3221         return Return::Error();
3222     }
3223 
3224     // If compression is enabled, we need to retain a copy of the old metadata
3225     // so we can access original blocks in case they are moved around. We do
3226     // not want to rely on the old super metadata slot because we don't
3227     // guarantee its validity after the slot switch is successful.
3228     if (cow_creator.compression_enabled) {
3229         auto metadata = current_metadata->Export();
3230         if (!metadata) {
3231             LOG(ERROR) << "Could not export current metadata";
3232             return Return::Error();
3233         }
3234 
3235         auto path = GetOldPartitionMetadataPath();
3236         if (!android::fs_mgr::WriteToImageFile(path, *metadata.get())) {
3237             LOG(ERROR) << "Cannot write old metadata to " << path;
3238             return Return::Error();
3239         }
3240     }
3241 
3242     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3243     status.set_state(update_state);
3244     status.set_compression_enabled(cow_creator.compression_enabled);
3245     if (cow_creator.compression_enabled) {
3246         if (!device()->IsTestDevice()) {
3247             bool userSnapshotsEnabled = IsUserspaceSnapshotsEnabled();
3248             const std::string UNKNOWN = "unknown";
3249             const std::string vendor_release = android::base::GetProperty(
3250                     "ro.vendor.build.version.release_or_codename", UNKNOWN);
3251 
3252             // No user-space snapshots if vendor partition is on Android 12
3253             if (vendor_release.find("12") != std::string::npos) {
3254                 LOG(INFO) << "Userspace snapshots disabled as vendor partition is on Android: "
3255                           << vendor_release;
3256                 userSnapshotsEnabled = false;
3257             }
3258 
3259             // Userspace snapshots is enabled only if compression is enabled
3260             status.set_userspace_snapshots(userSnapshotsEnabled);
3261             if (userSnapshotsEnabled) {
3262                 is_snapshot_userspace_ = true;
3263                 status.set_io_uring_enabled(IsIouringEnabled());
3264                 LOG(INFO) << "Userspace snapshots enabled";
3265             } else {
3266                 is_snapshot_userspace_ = false;
3267                 LOG(INFO) << "Userspace snapshots disabled";
3268             }
3269 
3270             // Terminate stale daemon if any
3271             std::unique_ptr<SnapuserdClient> snapuserd_client =
3272                     SnapuserdClient::Connect(kSnapuserdSocket, 5s);
3273             if (snapuserd_client) {
3274                 snapuserd_client->DetachSnapuserd();
3275                 snapuserd_client->CloseConnection();
3276                 snapuserd_client = nullptr;
3277             }
3278 
3279             // Clear the cached client if any
3280             if (snapuserd_client_) {
3281                 snapuserd_client_->CloseConnection();
3282                 snapuserd_client_ = nullptr;
3283             }
3284         } else {
3285             bool userSnapshotsEnabled = true;
3286             const std::string UNKNOWN = "unknown";
3287             const std::string vendor_release = android::base::GetProperty(
3288                     "ro.vendor.build.version.release_or_codename", UNKNOWN);
3289 
3290             // No user-space snapshots if vendor partition is on Android 12
3291             if (vendor_release.find("12") != std::string::npos) {
3292                 LOG(INFO) << "Userspace snapshots disabled as vendor partition is on Android: "
3293                           << vendor_release;
3294                 userSnapshotsEnabled = false;
3295             }
3296 
3297             userSnapshotsEnabled = (userSnapshotsEnabled && !IsDmSnapshotTestingEnabled());
3298             status.set_userspace_snapshots(userSnapshotsEnabled);
3299             if (!userSnapshotsEnabled) {
3300                 is_snapshot_userspace_ = false;
3301                 LOG(INFO) << "User-space snapshots disabled for testing";
3302             } else {
3303                 is_snapshot_userspace_ = true;
3304                 LOG(INFO) << "User-space snapshots enabled for testing";
3305             }
3306         }
3307     }
3308     if (!WriteSnapshotUpdateStatus(lock.get(), status)) {
3309         LOG(ERROR) << "Unable to write new update state";
3310         return Return::Error();
3311     }
3312 
3313     created_devices.Release();
3314     LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
3315 
3316     return Return::Ok();
3317 }
3318 
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)3319 Return SnapshotManager::CreateUpdateSnapshotsInternal(
3320         LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
3321         AutoDeviceList* created_devices,
3322         std::map<std::string, SnapshotStatus>* all_snapshot_status) {
3323     CHECK(lock);
3324 
3325     auto* target_metadata = cow_creator->target_metadata;
3326     const auto& target_suffix = cow_creator->target_suffix;
3327 
3328     if (!target_metadata->AddGroup(kCowGroupName, 0)) {
3329         LOG(ERROR) << "Cannot add group " << kCowGroupName;
3330         return Return::Error();
3331     }
3332 
3333     std::map<std::string, const PartitionUpdate*> partition_map;
3334     std::map<std::string, std::vector<Extent>> extra_extents_map;
3335     for (const auto& partition_update : manifest.partitions()) {
3336         auto suffixed_name = partition_update.partition_name() + target_suffix;
3337         auto&& [it, inserted] = partition_map.emplace(suffixed_name, &partition_update);
3338         if (!inserted) {
3339             LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
3340                        << " in update manifest.";
3341             return Return::Error();
3342         }
3343 
3344         auto& extra_extents = extra_extents_map[suffixed_name];
3345         if (partition_update.has_hash_tree_extent()) {
3346             extra_extents.push_back(partition_update.hash_tree_extent());
3347         }
3348         if (partition_update.has_fec_extent()) {
3349             extra_extents.push_back(partition_update.fec_extent());
3350         }
3351     }
3352 
3353     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3354         cow_creator->target_partition = target_partition;
3355         cow_creator->update = nullptr;
3356         auto iter = partition_map.find(target_partition->name());
3357         if (iter != partition_map.end()) {
3358             cow_creator->update = iter->second;
3359         } else {
3360             LOG(INFO) << target_partition->name()
3361                       << " isn't included in the payload, skipping the cow creation.";
3362             continue;
3363         }
3364 
3365         cow_creator->extra_extents.clear();
3366         auto extra_extents_it = extra_extents_map.find(target_partition->name());
3367         if (extra_extents_it != extra_extents_map.end()) {
3368             cow_creator->extra_extents = std::move(extra_extents_it->second);
3369         }
3370 
3371         // Compute the device sizes for the partition.
3372         auto cow_creator_ret = cow_creator->Run();
3373         if (!cow_creator_ret.has_value()) {
3374             LOG(ERROR) << "PartitionCowCreator returned no value for " << target_partition->name();
3375             return Return::Error();
3376         }
3377 
3378         LOG(INFO) << "For partition " << target_partition->name()
3379                   << ", device size = " << cow_creator_ret->snapshot_status.device_size()
3380                   << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
3381                   << ", cow partition size = "
3382                   << cow_creator_ret->snapshot_status.cow_partition_size()
3383                   << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
3384 
3385         // Delete any existing snapshot before re-creating one.
3386         if (!DeleteSnapshot(lock, target_partition->name())) {
3387             LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
3388                        << target_partition->name();
3389             return Return::Error();
3390         }
3391 
3392         // It is possible that the whole partition uses free space in super, and snapshot / COW
3393         // would not be needed. In this case, skip the partition.
3394         bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
3395         bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
3396                           cow_creator_ret->snapshot_status.cow_file_size()) > 0;
3397         CHECK(needs_snapshot == needs_cow);
3398 
3399         if (!needs_snapshot) {
3400             LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
3401                       << "because nothing needs to be snapshotted.";
3402             continue;
3403         }
3404 
3405         // Find the original partition size.
3406         auto name = target_partition->name();
3407         auto old_partition_name =
3408                 name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix;
3409         auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name);
3410         if (old_partition) {
3411             cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size());
3412         }
3413 
3414         // Store these device sizes to snapshot status file.
3415         if (!CreateSnapshot(lock, cow_creator, &cow_creator_ret->snapshot_status)) {
3416             return Return::Error();
3417         }
3418         created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
3419 
3420         // Create the COW partition. That is, use any remaining free space in super partition before
3421         // creating the COW images.
3422         if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
3423             CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
3424                     << "cow_partition_size == "
3425                     << cow_creator_ret->snapshot_status.cow_partition_size()
3426                     << " is not a multiple of sector size " << kSectorSize;
3427             auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
3428                                                                kCowGroupName, 0 /* flags */);
3429             if (cow_partition == nullptr) {
3430                 return Return::Error();
3431             }
3432 
3433             if (!target_metadata->ResizePartition(
3434                         cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
3435                         cow_creator_ret->cow_partition_usable_regions)) {
3436                 LOG(ERROR) << "Cannot create COW partition on metadata with size "
3437                            << cow_creator_ret->snapshot_status.cow_partition_size();
3438                 return Return::Error();
3439             }
3440             // Only the in-memory target_metadata is modified; nothing to clean up if there is an
3441             // error in the future.
3442         }
3443 
3444         all_snapshot_status->emplace(target_partition->name(),
3445                                      std::move(cow_creator_ret->snapshot_status));
3446 
3447         LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
3448     }
3449 
3450     LOG(INFO) << "Allocating CoW images.";
3451 
3452     for (auto&& [name, snapshot_status] : *all_snapshot_status) {
3453         // Create the backing COW image if necessary.
3454         if (snapshot_status.cow_file_size() > 0) {
3455             auto ret = CreateCowImage(lock, name);
3456             if (!ret.is_ok()) return AddRequiredSpace(ret, *all_snapshot_status);
3457         }
3458 
3459         LOG(INFO) << "Successfully created snapshot for " << name;
3460     }
3461 
3462     return Return::Ok();
3463 }
3464 
InitializeUpdateSnapshots(LockedFile * lock,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3465 Return SnapshotManager::InitializeUpdateSnapshots(
3466         LockedFile* lock, MetadataBuilder* target_metadata,
3467         const LpMetadata* exported_target_metadata, const std::string& target_suffix,
3468         const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3469     CHECK(lock);
3470 
3471     CreateLogicalPartitionParams cow_params{
3472             .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
3473             .metadata = exported_target_metadata,
3474             .timeout_ms = std::chrono::milliseconds::max(),
3475             .partition_opener = &device_->GetPartitionOpener(),
3476     };
3477     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3478         AutoDeviceList created_devices_for_cow;
3479 
3480         if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
3481             LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
3482                        << target_partition->name();
3483             return Return::Error();
3484         }
3485 
3486         auto it = all_snapshot_status.find(target_partition->name());
3487         if (it == all_snapshot_status.end()) continue;
3488         cow_params.partition_name = target_partition->name();
3489         std::string cow_name;
3490         if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
3491             return Return::Error();
3492         }
3493 
3494         std::string cow_path;
3495         if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
3496             LOG(ERROR) << "Cannot determine path for " << cow_name;
3497             return Return::Error();
3498         }
3499 
3500         if (it->second.compression_enabled()) {
3501             unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3502             if (fd < 0) {
3503                 PLOG(ERROR) << "open " << cow_path << " failed for snapshot "
3504                             << cow_params.partition_name;
3505                 return Return::Error();
3506             }
3507 
3508             CowOptions options;
3509             if (device()->IsTestDevice()) {
3510                 options.scratch_space = false;
3511             }
3512             options.compression = it->second.compression_algorithm();
3513 
3514             CowWriter writer(options);
3515             if (!writer.Initialize(fd) || !writer.Finalize()) {
3516                 LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
3517                 return Return::Error();
3518             }
3519         } else {
3520             auto ret = InitializeKernelCow(cow_path);
3521             if (!ret.is_ok()) {
3522                 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
3523                            << cow_path;
3524                 return AddRequiredSpace(ret, all_snapshot_status);
3525             }
3526         }
3527         // Let destructor of created_devices_for_cow to unmap the COW devices.
3528     };
3529     return Return::Ok();
3530 }
3531 
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)3532 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
3533                                         std::string* snapshot_path) {
3534     auto lock = LockShared();
3535     if (!lock) return false;
3536     if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3537         LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3538                    << params.GetPartitionName();
3539         return false;
3540     }
3541 
3542     SnapshotStatus status;
3543     if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3544         return false;
3545     }
3546     if (status.compression_enabled()) {
3547         LOG(ERROR) << "Cannot use MapUpdateSnapshot with compressed snapshots";
3548         return false;
3549     }
3550 
3551     SnapshotPaths paths;
3552     if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3553         return false;
3554     }
3555 
3556     if (!paths.snapshot_device.empty()) {
3557         *snapshot_path = paths.snapshot_device;
3558     } else {
3559         *snapshot_path = paths.target_device;
3560     }
3561     DCHECK(!snapshot_path->empty());
3562     return true;
3563 }
3564 
OpenSnapshotWriter(const android::fs_mgr::CreateLogicalPartitionParams & params,const std::optional<std::string> & source_device)3565 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenSnapshotWriter(
3566         const android::fs_mgr::CreateLogicalPartitionParams& params,
3567         const std::optional<std::string>& source_device) {
3568 #if defined(LIBSNAPSHOT_NO_COW_WRITE)
3569     (void)params;
3570     (void)source_device;
3571 
3572     LOG(ERROR) << "Snapshots cannot be written in first-stage init or recovery";
3573     return nullptr;
3574 #else
3575     // First unmap any existing mapping.
3576     auto lock = LockShared();
3577     if (!lock) return nullptr;
3578     if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3579         LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3580                    << params.GetPartitionName();
3581         return nullptr;
3582     }
3583 
3584     SnapshotPaths paths;
3585     if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3586         return nullptr;
3587     }
3588 
3589     SnapshotStatus status;
3590     if (!paths.cow_device_name.empty()) {
3591         if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3592             return nullptr;
3593         }
3594     } else {
3595         // Currently, partition_cow_creator always creates snapshots. The
3596         // reason is that if partition X shrinks while partition Y grows, we
3597         // cannot bindly write to the newly freed extents in X. This would
3598         // make the old slot unusable. So, the entire size of the target
3599         // partition is currently considered snapshottable.
3600         LOG(ERROR) << "No snapshot available for partition " << params.GetPartitionName();
3601         return nullptr;
3602     }
3603 
3604     if (status.compression_enabled()) {
3605         return OpenCompressedSnapshotWriter(lock.get(), source_device, params.GetPartitionName(),
3606                                             status, paths);
3607     }
3608     return OpenKernelSnapshotWriter(lock.get(), source_device, params.GetPartitionName(), status,
3609                                     paths);
3610 #endif
3611 }
3612 
3613 #if !defined(LIBSNAPSHOT_NO_COW_WRITE)
OpenCompressedSnapshotWriter(LockedFile * lock,const std::optional<std::string> & source_device,const std::string & partition_name,const SnapshotStatus & status,const SnapshotPaths & paths)3614 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenCompressedSnapshotWriter(
3615         LockedFile* lock, const std::optional<std::string>& source_device,
3616         [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3617         const SnapshotPaths& paths) {
3618     CHECK(lock);
3619 
3620     CowOptions cow_options;
3621     cow_options.compression = status.compression_algorithm();
3622     cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3623     // Disable scratch space for vts tests
3624     if (device()->IsTestDevice()) {
3625         cow_options.scratch_space = false;
3626     }
3627 
3628     // Currently we don't support partial snapshots, since partition_cow_creator
3629     // never creates this scenario.
3630     CHECK(status.snapshot_size() == status.device_size());
3631 
3632     auto writer = std::make_unique<CompressedSnapshotWriter>(cow_options);
3633     if (source_device) {
3634         writer->SetSourceDevice(*source_device);
3635     }
3636 
3637     std::string cow_path;
3638     if (!GetMappedImageDevicePath(paths.cow_device_name, &cow_path)) {
3639         LOG(ERROR) << "Could not determine path for " << paths.cow_device_name;
3640         return nullptr;
3641     }
3642 
3643     unique_fd cow_fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3644     if (cow_fd < 0) {
3645         PLOG(ERROR) << "OpenCompressedSnapshotWriter: open " << cow_path;
3646         return nullptr;
3647     }
3648     if (!writer->SetCowDevice(std::move(cow_fd))) {
3649         LOG(ERROR) << "Could not create COW writer from " << cow_path;
3650         return nullptr;
3651     }
3652 
3653     return writer;
3654 }
3655 
OpenKernelSnapshotWriter(LockedFile * lock,const std::optional<std::string> & source_device,const std::string & partition_name,const SnapshotStatus & status,const SnapshotPaths & paths)3656 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenKernelSnapshotWriter(
3657         LockedFile* lock, const std::optional<std::string>& source_device,
3658         [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3659         const SnapshotPaths& paths) {
3660     CHECK(lock);
3661 
3662     CowOptions cow_options;
3663     cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3664 
3665     auto writer = std::make_unique<OnlineKernelSnapshotWriter>(cow_options);
3666 
3667     std::string path = paths.snapshot_device.empty() ? paths.target_device : paths.snapshot_device;
3668     unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC));
3669     if (fd < 0) {
3670         PLOG(ERROR) << "open failed: " << path;
3671         return nullptr;
3672     }
3673 
3674     if (source_device) {
3675         writer->SetSourceDevice(*source_device);
3676     }
3677 
3678     uint64_t cow_size = status.cow_partition_size() + status.cow_file_size();
3679     writer->SetSnapshotDevice(std::move(fd), cow_size);
3680 
3681     return writer;
3682 }
3683 #endif  // !defined(LIBSNAPSHOT_NO_COW_WRITE)
3684 
UnmapUpdateSnapshot(const std::string & target_partition_name)3685 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
3686     auto lock = LockShared();
3687     if (!lock) return false;
3688     return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
3689 }
3690 
UnmapAllPartitionsInRecovery()3691 bool SnapshotManager::UnmapAllPartitionsInRecovery() {
3692     auto lock = LockExclusive();
3693     if (!lock) return false;
3694 
3695     const auto& opener = device_->GetPartitionOpener();
3696     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3697     auto super_device = device_->GetSuperDevice(slot);
3698     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
3699     if (!metadata) {
3700         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
3701         return false;
3702     }
3703 
3704     bool ok = true;
3705     for (const auto& partition : metadata->partitions) {
3706         auto partition_name = GetPartitionName(partition);
3707         ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
3708     }
3709     return ok;
3710 }
3711 
operator <<(std::ostream & os,SnapshotManager::Slot slot)3712 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
3713     switch (slot) {
3714         case SnapshotManager::Slot::Unknown:
3715             return os << "unknown";
3716         case SnapshotManager::Slot::Source:
3717             return os << "source";
3718         case SnapshotManager::Slot::Target:
3719             return os << "target";
3720     }
3721 }
3722 
Dump(std::ostream & os)3723 bool SnapshotManager::Dump(std::ostream& os) {
3724     // Don't actually lock. Dump() is for debugging purposes only, so it is okay
3725     // if it is racy.
3726     auto file = OpenLock(0 /* lock flag */);
3727     if (!file) return false;
3728 
3729     std::stringstream ss;
3730 
3731     auto update_status = ReadSnapshotUpdateStatus(file.get());
3732 
3733     ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
3734     ss << "Compression: " << update_status.compression_enabled() << std::endl;
3735     ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
3736     ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
3737     ss << "Rollback indicator: "
3738        << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3739        << std::endl;
3740     ss << "Forward merge indicator: "
3741        << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3742        << std::endl;
3743     ss << "Source build fingerprint: " << update_status.source_build_fingerprint() << std::endl;
3744 
3745     bool ok = true;
3746     std::vector<std::string> snapshots;
3747     if (!ListSnapshots(file.get(), &snapshots)) {
3748         LOG(ERROR) << "Could not list snapshots";
3749         snapshots.clear();
3750         ok = false;
3751     }
3752     for (const auto& name : snapshots) {
3753         ss << "Snapshot: " << name << std::endl;
3754         SnapshotStatus status;
3755         if (!ReadSnapshotStatus(file.get(), name, &status)) {
3756             ok = false;
3757             continue;
3758         }
3759         ss << "    state: " << SnapshotState_Name(status.state()) << std::endl;
3760         ss << "    device size (bytes): " << status.device_size() << std::endl;
3761         ss << "    snapshot size (bytes): " << status.snapshot_size() << std::endl;
3762         ss << "    cow partition size (bytes): " << status.cow_partition_size() << std::endl;
3763         ss << "    cow file size (bytes): " << status.cow_file_size() << std::endl;
3764         ss << "    allocated sectors: " << status.sectors_allocated() << std::endl;
3765         ss << "    metadata sectors: " << status.metadata_sectors() << std::endl;
3766         ss << "    compression: " << status.compression_algorithm() << std::endl;
3767     }
3768     os << ss.rdbuf();
3769     return ok;
3770 }
3771 
EnsureMetadataMounted()3772 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
3773     if (!device_->IsRecovery()) {
3774         // No need to mount anything in recovery.
3775         LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
3776         return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
3777     }
3778     auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
3779     if (ret == nullptr) return nullptr;
3780 
3781     // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
3782     // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
3783     // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
3784     // treat this case as if /metadata is not mounted.
3785     if (!LockShared()) {
3786         LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
3787                         "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
3788         return nullptr;
3789     }
3790     return ret;
3791 }
3792 
HandleImminentDataWipe(const std::function<void ()> & callback)3793 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
3794     if (!device_->IsRecovery()) {
3795         LOG(ERROR) << "Data wipes are only allowed in recovery.";
3796         return false;
3797     }
3798 
3799     auto mount = EnsureMetadataMounted();
3800     if (!mount || !mount->HasDevice()) {
3801         // We allow the wipe to continue, because if we can't mount /metadata,
3802         // it is unlikely the device would have booted anyway. If there is no
3803         // metadata partition, then the device predates Virtual A/B.
3804         return true;
3805     }
3806 
3807     // Check this early, so we don't accidentally start trying to populate
3808     // the state file in recovery. Note we don't call GetUpdateState since
3809     // we want errors in acquiring the lock to be propagated, instead of
3810     // returning UpdateState::None.
3811     auto state_file = GetStateFilePath();
3812     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3813         return true;
3814     }
3815 
3816     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3817     auto super_path = device_->GetSuperDevice(slot_number);
3818     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3819         LOG(ERROR) << "Unable to map partitions to complete merge.";
3820         return false;
3821     }
3822 
3823     auto process_callback = [&]() -> bool {
3824         if (callback) {
3825             callback();
3826         }
3827         return true;
3828     };
3829 
3830     in_factory_data_reset_ = true;
3831     UpdateState state =
3832             ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
3833     in_factory_data_reset_ = false;
3834 
3835     if (state == UpdateState::MergeFailed) {
3836         return false;
3837     }
3838 
3839     // Nothing should be depending on partitions now, so unmap them all.
3840     if (!UnmapAllPartitionsInRecovery()) {
3841         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3842     }
3843 
3844     if (state != UpdateState::None) {
3845         auto lock = LockExclusive();
3846         if (!lock) return false;
3847 
3848         // Zap the update state so the bootloader doesn't think we're still
3849         // merging. It's okay if this fails, it's informative only at this
3850         // point.
3851         WriteUpdateState(lock.get(), UpdateState::None);
3852     }
3853     return true;
3854 }
3855 
FinishMergeInRecovery()3856 bool SnapshotManager::FinishMergeInRecovery() {
3857     if (!device_->IsRecovery()) {
3858         LOG(ERROR) << "Data wipes are only allowed in recovery.";
3859         return false;
3860     }
3861 
3862     auto mount = EnsureMetadataMounted();
3863     if (!mount || !mount->HasDevice()) {
3864         return false;
3865     }
3866 
3867     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3868     auto super_path = device_->GetSuperDevice(slot_number);
3869     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3870         LOG(ERROR) << "Unable to map partitions to complete merge.";
3871         return false;
3872     }
3873 
3874     UpdateState state = ProcessUpdateState();
3875     if (state != UpdateState::MergeCompleted) {
3876         LOG(ERROR) << "Merge returned unexpected status: " << state;
3877         return false;
3878     }
3879 
3880     // Nothing should be depending on partitions now, so unmap them all.
3881     if (!UnmapAllPartitionsInRecovery()) {
3882         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3883     }
3884     return true;
3885 }
3886 
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)3887 UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
3888                                                           const std::function<bool()>& callback) {
3889     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3890     UpdateState state = ProcessUpdateState(callback);
3891     LOG(INFO) << "Update state in recovery: " << state;
3892     switch (state) {
3893         case UpdateState::MergeFailed:
3894             LOG(ERROR) << "Unrecoverable merge failure detected.";
3895             return state;
3896         case UpdateState::Unverified: {
3897             // If an OTA was just applied but has not yet started merging:
3898             //
3899             // - if forward merge is allowed, initiate merge and call
3900             // ProcessUpdateState again.
3901             //
3902             // - if forward merge is not allowed, we
3903             // have no choice but to revert slots, because the current slot will
3904             // immediately become unbootable. Rather than wait for the device
3905             // to reboot N times until a rollback, we proactively disable the
3906             // new slot instead.
3907             //
3908             // Since the rollback is inevitable, we don't treat a HAL failure
3909             // as an error here.
3910             auto slot = GetCurrentSlot();
3911             if (slot == Slot::Target) {
3912                 if (allow_forward_merge &&
3913                     access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
3914                     LOG(INFO) << "Forward merge allowed, initiating merge now.";
3915 
3916                     if (!InitiateMerge()) {
3917                         LOG(ERROR) << "Failed to initiate merge on data wipe.";
3918                         return UpdateState::MergeFailed;
3919                     }
3920                     return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
3921                 }
3922 
3923                 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
3924                 device_->SetSlotAsUnbootable(slot_number);
3925             } else {
3926                 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
3927             }
3928             break;
3929         }
3930         case UpdateState::MergeNeedsReboot:
3931             // We shouldn't get here, because nothing is depending on
3932             // logical partitions.
3933             LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
3934             break;
3935         default:
3936             break;
3937     }
3938     return state;
3939 }
3940 
EnsureNoOverflowSnapshot(LockedFile * lock)3941 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
3942     CHECK(lock);
3943 
3944     std::vector<std::string> snapshots;
3945     if (!ListSnapshots(lock, &snapshots)) {
3946         LOG(ERROR) << "Could not list snapshots.";
3947         return false;
3948     }
3949 
3950     for (const auto& snapshot : snapshots) {
3951         SnapshotStatus status;
3952         if (!ReadSnapshotStatus(lock, snapshot, &status)) {
3953             return false;
3954         }
3955         if (status.compression_enabled()) {
3956             continue;
3957         }
3958 
3959         std::vector<DeviceMapper::TargetInfo> targets;
3960         if (!dm_.GetTableStatus(snapshot, &targets)) {
3961             LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
3962             return false;
3963         }
3964         if (targets.size() != 1) {
3965             LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
3966                        << ", size = " << targets.size();
3967             return false;
3968         }
3969         if (targets[0].IsOverflowSnapshot()) {
3970             LOG(ERROR) << "Detected overflow in snapshot " << snapshot
3971                        << ", CoW device size computation is wrong!";
3972             return false;
3973         }
3974     }
3975 
3976     return true;
3977 }
3978 
RecoveryCreateSnapshotDevices()3979 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
3980     if (!device_->IsRecovery()) {
3981         LOG(ERROR) << __func__ << " is only allowed in recovery.";
3982         return CreateResult::NOT_CREATED;
3983     }
3984 
3985     auto mount = EnsureMetadataMounted();
3986     if (!mount || !mount->HasDevice()) {
3987         LOG(ERROR) << "Couldn't mount Metadata.";
3988         return CreateResult::NOT_CREATED;
3989     }
3990     return RecoveryCreateSnapshotDevices(mount);
3991 }
3992 
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)3993 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
3994         const std::unique_ptr<AutoDevice>& metadata_device) {
3995     if (!device_->IsRecovery()) {
3996         LOG(ERROR) << __func__ << " is only allowed in recovery.";
3997         return CreateResult::NOT_CREATED;
3998     }
3999 
4000     if (metadata_device == nullptr || !metadata_device->HasDevice()) {
4001         LOG(ERROR) << "Metadata not mounted.";
4002         return CreateResult::NOT_CREATED;
4003     }
4004 
4005     auto state_file = GetStateFilePath();
4006     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
4007         LOG(ERROR) << "Couldn't access state file.";
4008         return CreateResult::NOT_CREATED;
4009     }
4010 
4011     if (!NeedSnapshotsInFirstStageMount()) {
4012         return CreateResult::NOT_CREATED;
4013     }
4014 
4015     auto slot_suffix = device_->GetOtherSlotSuffix();
4016     auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
4017     auto super_path = device_->GetSuperDevice(slot_number);
4018     if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
4019         LOG(ERROR) << "Unable to map partitions.";
4020         return CreateResult::ERROR;
4021     }
4022     return CreateResult::CREATED;
4023 }
4024 
UpdateForwardMergeIndicator(bool wipe)4025 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
4026     auto path = GetForwardMergeIndicatorPath();
4027 
4028     if (!wipe) {
4029         LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
4030         return RemoveFileIfExists(path);
4031     }
4032 
4033     // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
4034 
4035     LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
4036     if (!android::base::WriteStringToFile("1", path)) {
4037         PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
4038         return false;
4039     }
4040 
4041     return true;
4042 }
4043 
GetSnapshotMergeStatsInstance()4044 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
4045     return SnapshotMergeStats::GetInstance(*this);
4046 }
4047 
4048 // This is only to be used in recovery or normal Android (not first-stage init).
4049 // We don't guarantee dm paths are available in first-stage init, because ueventd
4050 // isn't running yet.
GetMappedImageDevicePath(const std::string & device_name,std::string * device_path)4051 bool SnapshotManager::GetMappedImageDevicePath(const std::string& device_name,
4052                                                std::string* device_path) {
4053     // Try getting the device string if it is a device mapper device.
4054     if (dm_.GetState(device_name) != DmDeviceState::INVALID) {
4055         return dm_.GetDmDevicePathByName(device_name, device_path);
4056     }
4057 
4058     // Otherwise, get path from IImageManager.
4059     return images_->GetMappedImageDevice(device_name, device_path);
4060 }
4061 
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)4062 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
4063                                                        std::string* device_string_or_mapped_path) {
4064     // Try getting the device string if it is a device mapper device.
4065     if (dm_.GetState(device_name) != DmDeviceState::INVALID) {
4066         return dm_.GetDeviceString(device_name, device_string_or_mapped_path);
4067     }
4068 
4069     // Otherwise, get path from IImageManager.
4070     if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
4071         return false;
4072     }
4073 
4074     LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
4075                  << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
4076                  << "may not be available in first stage init! ";
4077     return true;
4078 }
4079 
WaitForDevice(const std::string & device,std::chrono::milliseconds timeout_ms)4080 bool SnapshotManager::WaitForDevice(const std::string& device,
4081                                     std::chrono::milliseconds timeout_ms) {
4082     if (!android::base::StartsWith(device, "/")) {
4083         return true;
4084     }
4085 
4086     // In first-stage init, we rely on init setting a callback which can
4087     // regenerate uevents and populate /dev for us.
4088     if (uevent_regen_callback_) {
4089         if (!uevent_regen_callback_(device)) {
4090             LOG(ERROR) << "Failed to find device after regenerating uevents: " << device;
4091             return false;
4092         }
4093         return true;
4094     }
4095 
4096     // Otherwise, the only kind of device we need to wait for is a dm-user
4097     // misc device. Normal calls to DeviceMapper::CreateDevice() guarantee
4098     // the path has been created.
4099     if (!android::base::StartsWith(device, "/dev/dm-user/")) {
4100         return true;
4101     }
4102 
4103     if (timeout_ms.count() == 0) {
4104         LOG(ERROR) << "No timeout was specified to wait for device: " << device;
4105         return false;
4106     }
4107     if (!android::fs_mgr::WaitForFile(device, timeout_ms)) {
4108         LOG(ERROR) << "Timed out waiting for device to appear: " << device;
4109         return false;
4110     }
4111     return true;
4112 }
4113 
IsSnapuserdRequired()4114 bool SnapshotManager::IsSnapuserdRequired() {
4115     auto lock = LockExclusive();
4116     if (!lock) return false;
4117 
4118     auto status = ReadSnapshotUpdateStatus(lock.get());
4119     return status.state() != UpdateState::None && status.compression_enabled();
4120 }
4121 
DetachSnapuserdForSelinux(std::vector<std::string> * snapuserd_argv)4122 bool SnapshotManager::DetachSnapuserdForSelinux(std::vector<std::string>* snapuserd_argv) {
4123     return PerformInitTransition(InitTransition::SELINUX_DETACH, snapuserd_argv);
4124 }
4125 
PerformSecondStageInitTransition()4126 bool SnapshotManager::PerformSecondStageInitTransition() {
4127     return PerformInitTransition(InitTransition::SECOND_STAGE);
4128 }
4129 
ReadOldPartitionMetadata(LockedFile * lock)4130 const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) {
4131     CHECK(lock);
4132 
4133     if (!old_partition_metadata_) {
4134         auto path = GetOldPartitionMetadataPath();
4135         old_partition_metadata_ = android::fs_mgr::ReadFromImageFile(path);
4136         if (!old_partition_metadata_) {
4137             LOG(ERROR) << "Could not read old partition metadata from " << path;
4138             return nullptr;
4139         }
4140     }
4141     return old_partition_metadata_.get();
4142 }
4143 
DecideMergePhase(const SnapshotStatus & status)4144 MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) {
4145     if (status.compression_enabled() && status.device_size() < status.old_partition_size()) {
4146         return MergePhase::FIRST_PHASE;
4147     }
4148     return MergePhase::SECOND_PHASE;
4149 }
4150 
UpdateCowStats(ISnapshotMergeStats * stats)4151 void SnapshotManager::UpdateCowStats(ISnapshotMergeStats* stats) {
4152     auto lock = LockExclusive();
4153     if (!lock) return;
4154 
4155     std::vector<std::string> snapshots;
4156     if (!ListSnapshots(lock.get(), &snapshots, GetSnapshotSlotSuffix())) {
4157         LOG(ERROR) << "Could not list snapshots";
4158         return;
4159     }
4160 
4161     uint64_t cow_file_size = 0;
4162     uint64_t total_cow_size = 0;
4163     uint64_t estimated_cow_size = 0;
4164     for (const auto& snapshot : snapshots) {
4165         SnapshotStatus status;
4166         if (!ReadSnapshotStatus(lock.get(), snapshot, &status)) {
4167             return;
4168         }
4169 
4170         cow_file_size += status.cow_file_size();
4171         total_cow_size += status.cow_file_size() + status.cow_partition_size();
4172         estimated_cow_size += status.estimated_cow_size();
4173     }
4174 
4175     stats->set_cow_file_size(cow_file_size);
4176     stats->set_total_cow_size_bytes(total_cow_size);
4177     stats->set_estimated_cow_size_bytes(estimated_cow_size);
4178 }
4179 
DeleteDeviceIfExists(const std::string & name,const std::chrono::milliseconds & timeout_ms)4180 bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
4181                                            const std::chrono::milliseconds& timeout_ms) {
4182     auto start = std::chrono::steady_clock::now();
4183     while (true) {
4184         if (dm_.DeleteDeviceIfExists(name)) {
4185             return true;
4186         }
4187         auto now = std::chrono::steady_clock::now();
4188         auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
4189         if (elapsed >= timeout_ms) {
4190             break;
4191         }
4192         std::this_thread::sleep_for(400ms);
4193     }
4194 
4195     // Try to diagnose why this failed. First get the actual device path.
4196     std::string full_path;
4197     if (!dm_.GetDmDevicePathByName(name, &full_path)) {
4198         LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
4199         return false;
4200     }
4201 
4202     // Check for child dm-devices.
4203     std::string block_name = android::base::Basename(full_path);
4204     std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
4205 
4206     std::error_code ec;
4207     std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
4208     if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
4209         LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
4210         return false;
4211     }
4212 
4213     // Check for mounted partitions.
4214     android::fs_mgr::Fstab fstab;
4215     android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
4216     for (const auto& entry : fstab) {
4217         if (android::base::Basename(entry.blk_device) == block_name) {
4218             LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
4219             return false;
4220         }
4221     }
4222 
4223     // Check for detached mounted partitions.
4224     for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
4225         std::string fs_type = android::base::Basename(fs.path().c_str());
4226         if (!(fs_type == "ext4" || fs_type == "f2fs")) {
4227             continue;
4228         }
4229 
4230         std::string path = fs.path().c_str() + "/"s + block_name;
4231         if (access(path.c_str(), F_OK) == 0) {
4232             LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
4233                        << "; possibly open file descriptor or attached loop device.";
4234             return false;
4235         }
4236     }
4237 
4238     LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")"
4239                << " still in use."
4240                << "  Probably a file descriptor was leaked or held open, or a loop device is"
4241                << " attached.";
4242     return false;
4243 }
4244 
ReadMergeFailureCode()4245 MergeFailureCode SnapshotManager::ReadMergeFailureCode() {
4246     auto lock = LockExclusive();
4247     if (!lock) return MergeFailureCode::AcquireLock;
4248 
4249     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
4250     if (status.state() != UpdateState::MergeFailed) {
4251         return MergeFailureCode::Ok;
4252     }
4253     return status.merge_failure_code();
4254 }
4255 
ReadSourceBuildFingerprint()4256 std::string SnapshotManager::ReadSourceBuildFingerprint() {
4257     auto lock = LockExclusive();
4258     if (!lock) return {};
4259 
4260     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
4261     return status.source_build_fingerprint();
4262 }
4263 
4264 }  // namespace snapshot
4265 }  // namespace android
4266