• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <libsnapshot/snapshot.h>
16 
17 #include <dirent.h>
18 #include <math.h>
19 #include <sys/file.h>
20 #include <sys/types.h>
21 #include <sys/unistd.h>
22 
23 #include <optional>
24 #include <thread>
25 #include <unordered_set>
26 
27 #include <android-base/file.h>
28 #include <android-base/logging.h>
29 #include <android-base/parseint.h>
30 #include <android-base/strings.h>
31 #include <android-base/unique_fd.h>
32 #include <ext4_utils/ext4_utils.h>
33 #include <fs_mgr.h>
34 #include <fs_mgr_dm_linear.h>
35 #include <fstab/fstab.h>
36 #include <libdm/dm.h>
37 #include <libfiemap/image_manager.h>
38 #include <liblp/liblp.h>
39 
40 #include <android/snapshot/snapshot.pb.h>
41 #include <libsnapshot/snapshot_stats.h>
42 #include "device_info.h"
43 #include "partition_cow_creator.h"
44 #include "snapshot_metadata_updater.h"
45 #include "utility.h"
46 
47 namespace android {
48 namespace snapshot {
49 
50 using android::base::unique_fd;
51 using android::dm::DeviceMapper;
52 using android::dm::DmDeviceState;
53 using android::dm::DmTable;
54 using android::dm::DmTargetLinear;
55 using android::dm::DmTargetSnapshot;
56 using android::dm::kSectorSize;
57 using android::dm::SnapshotStorageMode;
58 using android::fiemap::FiemapStatus;
59 using android::fiemap::IImageManager;
60 using android::fs_mgr::CreateDmTable;
61 using android::fs_mgr::CreateLogicalPartition;
62 using android::fs_mgr::CreateLogicalPartitionParams;
63 using android::fs_mgr::GetPartitionGroupName;
64 using android::fs_mgr::GetPartitionName;
65 using android::fs_mgr::LpMetadata;
66 using android::fs_mgr::MetadataBuilder;
67 using android::fs_mgr::SlotNumberForSlotSuffix;
68 using android::hardware::boot::V1_1::MergeStatus;
69 using chromeos_update_engine::DeltaArchiveManifest;
70 using chromeos_update_engine::Extent;
71 using chromeos_update_engine::InstallOperation;
72 template <typename T>
73 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
74 using std::chrono::duration_cast;
75 using namespace std::chrono_literals;
76 using namespace std::string_literals;
77 
78 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
79 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
80 static constexpr auto kUpdateStateCheckInterval = 2s;
81 
82 // Note: IImageManager is an incomplete type in the header, so the default
83 // destructor doesn't work.
~SnapshotManager()84 SnapshotManager::~SnapshotManager() {}
85 
New(IDeviceInfo * info)86 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
87     if (!info) {
88         info = new DeviceInfo();
89     }
90     return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
91 }
92 
NewForFirstStageMount(IDeviceInfo * info)93 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
94     auto sm = New(info);
95     if (!sm || !sm->ForceLocalImageManager()) {
96         return nullptr;
97     }
98     return sm;
99 }
100 
SnapshotManager(IDeviceInfo * device)101 SnapshotManager::SnapshotManager(IDeviceInfo* device) : device_(device) {
102     gsid_dir_ = device_->GetGsidDir();
103     metadata_dir_ = device_->GetMetadataDir();
104 }
105 
GetCowName(const std::string & snapshot_name)106 static std::string GetCowName(const std::string& snapshot_name) {
107     return snapshot_name + "-cow";
108 }
109 
GetCowImageDeviceName(const std::string & snapshot_name)110 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
111     return snapshot_name + "-cow-img";
112 }
113 
GetBaseDeviceName(const std::string & partition_name)114 static std::string GetBaseDeviceName(const std::string& partition_name) {
115     return partition_name + "-base";
116 }
117 
GetSnapshotExtraDeviceName(const std::string & snapshot_name)118 static std::string GetSnapshotExtraDeviceName(const std::string& snapshot_name) {
119     return snapshot_name + "-inner";
120 }
121 
BeginUpdate()122 bool SnapshotManager::BeginUpdate() {
123     bool needs_merge = false;
124     if (!TryCancelUpdate(&needs_merge)) {
125         return false;
126     }
127     if (needs_merge) {
128         LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
129         auto state = ProcessUpdateState();
130         LOG(INFO) << "Merged with state = " << state;
131     }
132 
133     auto file = LockExclusive();
134     if (!file) return false;
135 
136     // Purge the ImageManager just in case there is a corrupt lp_metadata file
137     // lying around. (NB: no need to return false on an error, we can let the
138     // update try to progress.)
139     if (EnsureImageManager()) {
140         images_->RemoveAllImages();
141     }
142 
143     auto state = ReadUpdateState(file.get());
144     if (state != UpdateState::None) {
145         LOG(ERROR) << "An update is already in progress, cannot begin a new update";
146         return false;
147     }
148     return WriteUpdateState(file.get(), UpdateState::Initiated);
149 }
150 
CancelUpdate()151 bool SnapshotManager::CancelUpdate() {
152     bool needs_merge = false;
153     if (!TryCancelUpdate(&needs_merge)) {
154         return false;
155     }
156     if (needs_merge) {
157         LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
158     }
159     return !needs_merge;
160 }
161 
TryCancelUpdate(bool * needs_merge)162 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
163     *needs_merge = false;
164 
165     auto file = LockExclusive();
166     if (!file) return false;
167 
168     UpdateState state = ReadUpdateState(file.get());
169     if (state == UpdateState::None) return true;
170 
171     if (state == UpdateState::Initiated) {
172         LOG(INFO) << "Update has been initiated, now canceling";
173         return RemoveAllUpdateState(file.get());
174     }
175 
176     if (state == UpdateState::Unverified) {
177         // We completed an update, but it can still be canceled if we haven't booted into it.
178         auto slot = GetCurrentSlot();
179         if (slot != Slot::Target) {
180             LOG(INFO) << "Canceling previously completed updates (if any)";
181             return RemoveAllUpdateState(file.get());
182         }
183     }
184     *needs_merge = true;
185     return true;
186 }
187 
ReadUpdateSourceSlotSuffix()188 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
189     auto boot_file = GetSnapshotBootIndicatorPath();
190     std::string contents;
191     if (!android::base::ReadFileToString(boot_file, &contents)) {
192         PLOG(WARNING) << "Cannot read " << boot_file;
193         return {};
194     }
195     return contents;
196 }
197 
GetCurrentSlot()198 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
199     auto contents = ReadUpdateSourceSlotSuffix();
200     if (contents.empty()) {
201         return Slot::Unknown;
202     }
203     if (device_->GetSlotSuffix() == contents) {
204         return Slot::Source;
205     }
206     return Slot::Target;
207 }
208 
RemoveFileIfExists(const std::string & path)209 static bool RemoveFileIfExists(const std::string& path) {
210     std::string message;
211     if (!android::base::RemoveFileIfExists(path, &message)) {
212         LOG(ERROR) << "Remove failed: " << path << ": " << message;
213         return false;
214     }
215     return true;
216 }
217 
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)218 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
219     if (prolog && !prolog()) {
220         LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
221         return false;
222     }
223 
224     LOG(INFO) << "Removing all update state.";
225 
226     if (!RemoveAllSnapshots(lock)) {
227         LOG(ERROR) << "Could not remove all snapshots";
228         return false;
229     }
230 
231     // It's okay if these fail:
232     // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
233     // reading the indicator file, so it's not a problem if it still exists
234     // after the update completes.
235     // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
236     // matches the incoming update.
237     std::vector<std::string> files = {
238             GetSnapshotBootIndicatorPath(),
239             GetRollbackIndicatorPath(),
240             GetForwardMergeIndicatorPath(),
241     };
242     for (const auto& file : files) {
243         RemoveFileIfExists(file);
244     }
245 
246     // If this fails, we'll keep trying to remove the update state (as the
247     // device reboots or starts a new update) until it finally succeeds.
248     return WriteUpdateState(lock, UpdateState::None);
249 }
250 
FinishedSnapshotWrites(bool wipe)251 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
252     auto lock = LockExclusive();
253     if (!lock) return false;
254 
255     auto update_state = ReadUpdateState(lock.get());
256     if (update_state == UpdateState::Unverified) {
257         LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
258         return true;
259     }
260 
261     if (update_state != UpdateState::Initiated) {
262         LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
263         return false;
264     }
265 
266     if (!EnsureNoOverflowSnapshot(lock.get())) {
267         LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
268         return false;
269     }
270 
271     if (!UpdateForwardMergeIndicator(wipe)) {
272         return false;
273     }
274 
275     // This file is written on boot to detect whether a rollback occurred. It
276     // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
277     // snapshots too early.
278     if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
279         return false;
280     }
281 
282     // This file acts as both a quick indicator for init (it can use access(2)
283     // to decide how to do first-stage mounts), and it stores the old slot, so
284     // we can tell whether or not we performed a rollback.
285     auto contents = device_->GetSlotSuffix();
286     auto boot_file = GetSnapshotBootIndicatorPath();
287     if (!WriteStringToFileAtomic(contents, boot_file)) {
288         PLOG(ERROR) << "write failed: " << boot_file;
289         return false;
290     }
291     return WriteUpdateState(lock.get(), UpdateState::Unverified);
292 }
293 
CreateSnapshot(LockedFile * lock,SnapshotStatus * status)294 bool SnapshotManager::CreateSnapshot(LockedFile* lock, SnapshotStatus* status) {
295     CHECK(lock);
296     CHECK(lock->lock_mode() == LOCK_EX);
297     CHECK(status);
298 
299     if (status->name().empty()) {
300         LOG(ERROR) << "SnapshotStatus has no name.";
301         return false;
302     }
303     // Sanity check these sizes. Like liblp, we guarantee the partition size
304     // is respected, which means it has to be sector-aligned. (This guarantee
305     // is useful for locating avb footers correctly). The COW file size, however,
306     // can be arbitrarily larger than specified, so we can safely round it up.
307     if (status->device_size() % kSectorSize != 0) {
308         LOG(ERROR) << "Snapshot " << status->name()
309                    << " device size is not a multiple of the sector size: "
310                    << status->device_size();
311         return false;
312     }
313     if (status->snapshot_size() % kSectorSize != 0) {
314         LOG(ERROR) << "Snapshot " << status->name()
315                    << " snapshot size is not a multiple of the sector size: "
316                    << status->snapshot_size();
317         return false;
318     }
319     if (status->cow_partition_size() % kSectorSize != 0) {
320         LOG(ERROR) << "Snapshot " << status->name()
321                    << " cow partition size is not a multiple of the sector size: "
322                    << status->cow_partition_size();
323         return false;
324     }
325     if (status->cow_file_size() % kSectorSize != 0) {
326         LOG(ERROR) << "Snapshot " << status->name()
327                    << " cow file size is not a multiple of the sector size: "
328                    << status->cow_file_size();
329         return false;
330     }
331 
332     status->set_state(SnapshotState::CREATED);
333     status->set_sectors_allocated(0);
334     status->set_metadata_sectors(0);
335 
336     if (!WriteSnapshotStatus(lock, *status)) {
337         PLOG(ERROR) << "Could not write snapshot status: " << status->name();
338         return false;
339     }
340     return true;
341 }
342 
CreateCowImage(LockedFile * lock,const std::string & name)343 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
344     CHECK(lock);
345     CHECK(lock->lock_mode() == LOCK_EX);
346     if (!EnsureImageManager()) return Return::Error();
347 
348     SnapshotStatus status;
349     if (!ReadSnapshotStatus(lock, name, &status)) {
350         return Return::Error();
351     }
352 
353     // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
354     // Sanity check this.
355     if (status.cow_file_size() % kSectorSize != 0) {
356         LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
357                    << status.cow_file_size();
358         return Return::Error();
359     }
360 
361     std::string cow_image_name = GetCowImageDeviceName(name);
362     int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
363     return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
364 }
365 
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)366 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
367                                   const std::string& base_device, const std::string& cow_device,
368                                   const std::chrono::milliseconds& timeout_ms,
369                                   std::string* dev_path) {
370     CHECK(lock);
371 
372     SnapshotStatus status;
373     if (!ReadSnapshotStatus(lock, name, &status)) {
374         return false;
375     }
376     if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
377         LOG(ERROR) << "Should not create a snapshot device for " << name
378                    << " after merging has completed.";
379         return false;
380     }
381 
382     // Validate the block device size, as well as the requested snapshot size.
383     // Note that during first-stage init, we don't have the device paths.
384     if (android::base::StartsWith(base_device, "/")) {
385         unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
386         if (fd < 0) {
387             PLOG(ERROR) << "open failed: " << base_device;
388             return false;
389         }
390         auto dev_size = get_block_device_size(fd);
391         if (!dev_size) {
392             PLOG(ERROR) << "Could not determine block device size: " << base_device;
393             return false;
394         }
395         if (status.device_size() != dev_size) {
396             LOG(ERROR) << "Block device size for " << base_device << " does not match"
397                        << "(expected " << status.device_size() << ", got " << dev_size << ")";
398             return false;
399         }
400     }
401     if (status.device_size() % kSectorSize != 0) {
402         LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
403         return false;
404     }
405     if (status.snapshot_size() % kSectorSize != 0 ||
406         status.snapshot_size() > status.device_size()) {
407         LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
408         return false;
409     }
410     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
411     uint64_t linear_sectors = (status.device_size() - status.snapshot_size()) / kSectorSize;
412 
413     auto& dm = DeviceMapper::Instance();
414 
415     // Note that merging is a global state. We do track whether individual devices
416     // have completed merging, but the start of the merge process is considered
417     // atomic.
418     SnapshotStorageMode mode;
419     switch (ReadUpdateState(lock)) {
420         case UpdateState::MergeCompleted:
421         case UpdateState::MergeNeedsReboot:
422             LOG(ERROR) << "Should not create a snapshot device for " << name
423                        << " after global merging has completed.";
424             return false;
425         case UpdateState::Merging:
426         case UpdateState::MergeFailed:
427             // Note: MergeFailed indicates that a merge is in progress, but
428             // is possibly stalled. We still have to honor the merge.
429             mode = SnapshotStorageMode::Merge;
430             break;
431         default:
432             mode = SnapshotStorageMode::Persistent;
433             break;
434     }
435 
436     // The kernel (tested on 4.19) crashes horribly if a device has both a snapshot
437     // and a linear target in the same table. Instead, we stack them, and give the
438     // snapshot device a different name. It is not exposed to the caller in this
439     // case.
440     auto snap_name = (linear_sectors > 0) ? GetSnapshotExtraDeviceName(name) : name;
441 
442     DmTable table;
443     table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
444                                     kSnapshotChunkSize);
445     if (!dm.CreateDevice(snap_name, table, dev_path, timeout_ms)) {
446         LOG(ERROR) << "Could not create snapshot device: " << snap_name;
447         return false;
448     }
449 
450     if (linear_sectors) {
451         std::string snap_dev;
452         if (!dm.GetDeviceString(snap_name, &snap_dev)) {
453             LOG(ERROR) << "Cannot determine major/minor for: " << snap_name;
454             return false;
455         }
456 
457         // Our stacking will looks like this:
458         //     [linear, linear] ; to snapshot, and non-snapshot region of base device
459         //     [snapshot-inner]
460         //     [base device]   [cow]
461         DmTable table;
462         table.Emplace<DmTargetLinear>(0, snapshot_sectors, snap_dev, 0);
463         table.Emplace<DmTargetLinear>(snapshot_sectors, linear_sectors, base_device,
464                                       snapshot_sectors);
465         if (!dm.CreateDevice(name, table, dev_path, timeout_ms)) {
466             LOG(ERROR) << "Could not create outer snapshot device: " << name;
467             dm.DeleteDevice(snap_name);
468             return false;
469         }
470     }
471 
472     // :TODO: when merging is implemented, we need to add an argument to the
473     // status indicating how much progress is left to merge. (device-mapper
474     // does not retain the initial values, so we can't derive them.)
475     return true;
476 }
477 
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)478 std::optional<std::string> SnapshotManager::MapCowImage(
479         const std::string& name, const std::chrono::milliseconds& timeout_ms) {
480     if (!EnsureImageManager()) return std::nullopt;
481     auto cow_image_name = GetCowImageDeviceName(name);
482 
483     bool ok;
484     std::string cow_dev;
485     if (has_local_image_manager_) {
486         // If we forced a local image manager, it means we don't have binder,
487         // which means first-stage init. We must use device-mapper.
488         const auto& opener = device_->GetPartitionOpener();
489         ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
490     } else {
491         ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
492     }
493 
494     if (ok) {
495         LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
496         return cow_dev;
497     }
498     LOG(ERROR) << "Could not map image device: " << cow_image_name;
499     return std::nullopt;
500 }
501 
UnmapSnapshot(LockedFile * lock,const std::string & name)502 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
503     CHECK(lock);
504 
505     auto& dm = DeviceMapper::Instance();
506     if (!dm.DeleteDeviceIfExists(name)) {
507         LOG(ERROR) << "Could not delete snapshot device: " << name;
508         return false;
509     }
510 
511     auto snapshot_extra_device = GetSnapshotExtraDeviceName(name);
512     if (!dm.DeleteDeviceIfExists(snapshot_extra_device)) {
513         LOG(ERROR) << "Could not delete snapshot inner device: " << snapshot_extra_device;
514         return false;
515     }
516 
517     return true;
518 }
519 
UnmapCowImage(const std::string & name)520 bool SnapshotManager::UnmapCowImage(const std::string& name) {
521     if (!EnsureImageManager()) return false;
522     return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
523 }
524 
DeleteSnapshot(LockedFile * lock,const std::string & name)525 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
526     CHECK(lock);
527     CHECK(lock->lock_mode() == LOCK_EX);
528     if (!EnsureImageManager()) return false;
529 
530     if (!UnmapCowDevices(lock, name)) {
531         return false;
532     }
533 
534     // We can't delete snapshots in recovery. The only way we'd try is it we're
535     // completing or canceling a merge in preparation for a data wipe, in which
536     // case, we don't care if the file sticks around.
537     if (device_->IsRecovery()) {
538         LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
539         return true;
540     }
541 
542     auto cow_image_name = GetCowImageDeviceName(name);
543     if (images_->BackingImageExists(cow_image_name)) {
544         if (!images_->DeleteBackingImage(cow_image_name)) {
545             return false;
546         }
547     }
548 
549     std::string error;
550     auto file_path = GetSnapshotStatusFilePath(name);
551     if (!android::base::RemoveFileIfExists(file_path, &error)) {
552         LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
553         return false;
554     }
555     return true;
556 }
557 
InitiateMerge(uint64_t * cow_file_size)558 bool SnapshotManager::InitiateMerge(uint64_t* cow_file_size) {
559     auto lock = LockExclusive();
560     if (!lock) return false;
561 
562     UpdateState state = ReadUpdateState(lock.get());
563     if (state != UpdateState::Unverified) {
564         LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
565         return false;
566     }
567 
568     auto slot = GetCurrentSlot();
569     if (slot != Slot::Target) {
570         LOG(ERROR) << "Device cannot merge while not booting from new slot";
571         return false;
572     }
573 
574     std::vector<std::string> snapshots;
575     if (!ListSnapshots(lock.get(), &snapshots)) {
576         LOG(ERROR) << "Could not list snapshots";
577         return false;
578     }
579 
580     auto other_suffix = device_->GetOtherSlotSuffix();
581 
582     auto& dm = DeviceMapper::Instance();
583     for (const auto& snapshot : snapshots) {
584         if (android::base::EndsWith(snapshot, other_suffix)) {
585             // Allow the merge to continue, but log this unexpected case.
586             LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
587             continue;
588         }
589 
590         // The device has to be mapped, since everything should be merged at
591         // the same time. This is a fairly serious error. We could forcefully
592         // map everything here, but it should have been mapped during first-
593         // stage init.
594         if (dm.GetState(snapshot) == DmDeviceState::INVALID) {
595             LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
596             return false;
597         }
598     }
599 
600     auto metadata = ReadCurrentMetadata();
601     for (auto it = snapshots.begin(); it != snapshots.end();) {
602         switch (GetMetadataPartitionState(*metadata, *it)) {
603             case MetadataPartitionState::Flashed:
604                 LOG(WARNING) << "Detected re-flashing for partition " << *it
605                              << ". Skip merging it.";
606                 [[fallthrough]];
607             case MetadataPartitionState::None: {
608                 LOG(WARNING) << "Deleting snapshot for partition " << *it;
609                 if (!DeleteSnapshot(lock.get(), *it)) {
610                     LOG(WARNING) << "Cannot delete snapshot for partition " << *it
611                                  << ". Skip merging it anyways.";
612                 }
613                 it = snapshots.erase(it);
614             } break;
615             case MetadataPartitionState::Updated: {
616                 ++it;
617             } break;
618         }
619     }
620 
621     uint64_t total_cow_file_size = 0;
622     DmTargetSnapshot::Status initial_target_values = {};
623     for (const auto& snapshot : snapshots) {
624         DmTargetSnapshot::Status current_status;
625         if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) {
626             return false;
627         }
628         initial_target_values.sectors_allocated += current_status.sectors_allocated;
629         initial_target_values.total_sectors += current_status.total_sectors;
630         initial_target_values.metadata_sectors += current_status.metadata_sectors;
631 
632         SnapshotStatus snapshot_status;
633         if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
634             return false;
635         }
636         total_cow_file_size += snapshot_status.cow_file_size();
637     }
638 
639     if (cow_file_size) {
640         *cow_file_size = total_cow_file_size;
641     }
642 
643     SnapshotUpdateStatus initial_status;
644     initial_status.set_state(UpdateState::Merging);
645     initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
646     initial_status.set_total_sectors(initial_target_values.total_sectors);
647     initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
648 
649     // Point of no return - mark that we're starting a merge. From now on every
650     // snapshot must be a merge target.
651     if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
652         return false;
653     }
654 
655     bool rewrote_all = true;
656     for (const auto& snapshot : snapshots) {
657         // If this fails, we have no choice but to continue. Everything must
658         // be merged. This is not an ideal state to be in, but it is safe,
659         // because we the next boot will try again.
660         if (!SwitchSnapshotToMerge(lock.get(), snapshot)) {
661             LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
662             rewrote_all = false;
663         }
664     }
665 
666     // If we couldn't switch everything to a merge target, pre-emptively mark
667     // this merge as failed. It will get acknowledged when WaitForMerge() is
668     // called.
669     if (!rewrote_all) {
670         WriteUpdateState(lock.get(), UpdateState::MergeFailed);
671     }
672 
673     // Return true no matter what, because a merge was initiated.
674     return true;
675 }
676 
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)677 bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
678     SnapshotStatus status;
679     if (!ReadSnapshotStatus(lock, name, &status)) {
680         return false;
681     }
682     if (status.state() != SnapshotState::CREATED) {
683         LOG(WARNING) << "Snapshot " << name
684                      << " has unexpected state: " << SnapshotState_Name(status.state());
685     }
686 
687     // After this, we return true because we technically did switch to a merge
688     // target. Everything else we do here is just informational.
689     auto dm_name = GetSnapshotDeviceName(name, status);
690     if (!RewriteSnapshotDeviceTable(dm_name)) {
691         return false;
692     }
693 
694     status.set_state(SnapshotState::MERGING);
695 
696     DmTargetSnapshot::Status dm_status;
697     if (!QuerySnapshotStatus(dm_name, nullptr, &dm_status)) {
698         LOG(ERROR) << "Could not query merge status for snapshot: " << dm_name;
699     }
700     status.set_sectors_allocated(dm_status.sectors_allocated);
701     status.set_metadata_sectors(dm_status.metadata_sectors);
702     if (!WriteSnapshotStatus(lock, status)) {
703         LOG(ERROR) << "Could not update status file for snapshot: " << name;
704     }
705     return true;
706 }
707 
RewriteSnapshotDeviceTable(const std::string & dm_name)708 bool SnapshotManager::RewriteSnapshotDeviceTable(const std::string& dm_name) {
709     auto& dm = DeviceMapper::Instance();
710 
711     std::vector<DeviceMapper::TargetInfo> old_targets;
712     if (!dm.GetTableInfo(dm_name, &old_targets)) {
713         LOG(ERROR) << "Could not read snapshot device table: " << dm_name;
714         return false;
715     }
716     if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
717         LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << dm_name;
718         return false;
719     }
720 
721     std::string base_device, cow_device;
722     if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
723         LOG(ERROR) << "Could not derive underlying devices for snapshot: " << dm_name;
724         return false;
725     }
726 
727     DmTable table;
728     table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
729                                     SnapshotStorageMode::Merge, kSnapshotChunkSize);
730     if (!dm.LoadTableAndActivate(dm_name, table)) {
731         LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << dm_name;
732         return false;
733     }
734     LOG(INFO) << "Successfully switched snapshot device to a merge target: " << dm_name;
735     return true;
736 }
737 
738 enum class TableQuery {
739     Table,
740     Status,
741 };
742 
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)743 static bool GetSingleTarget(const std::string& dm_name, TableQuery query,
744                             DeviceMapper::TargetInfo* target) {
745     auto& dm = DeviceMapper::Instance();
746     if (dm.GetState(dm_name) == DmDeviceState::INVALID) {
747         return false;
748     }
749 
750     std::vector<DeviceMapper::TargetInfo> targets;
751     bool result;
752     if (query == TableQuery::Status) {
753         result = dm.GetTableStatus(dm_name, &targets);
754     } else {
755         result = dm.GetTableInfo(dm_name, &targets);
756     }
757     if (!result) {
758         LOG(ERROR) << "Could not query device: " << dm_name;
759         return false;
760     }
761     if (targets.size() != 1) {
762         return false;
763     }
764 
765     *target = std::move(targets[0]);
766     return true;
767 }
768 
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)769 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
770     DeviceMapper::TargetInfo snap_target;
771     if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
772         return false;
773     }
774     auto type = DeviceMapper::GetTargetType(snap_target.spec);
775     if (type != "snapshot" && type != "snapshot-merge") {
776         return false;
777     }
778     if (target) {
779         *target = std::move(snap_target);
780     }
781     return true;
782 }
783 
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)784 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
785                                           DmTargetSnapshot::Status* status) {
786     DeviceMapper::TargetInfo target;
787     if (!IsSnapshotDevice(dm_name, &target)) {
788         LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
789         return false;
790     }
791     if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
792         LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
793         return false;
794     }
795     if (target_type) {
796         *target_type = DeviceMapper::GetTargetType(target.spec);
797     }
798     return true;
799 }
800 
801 // Note that when a merge fails, we will *always* try again to complete the
802 // merge each time the device boots. There is no harm in doing so, and if
803 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)804 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
805                                                 const std::function<bool()>& before_cancel) {
806     while (true) {
807         UpdateState state = CheckMergeState(before_cancel);
808         if (state == UpdateState::MergeFailed) {
809             AcknowledgeMergeFailure();
810         }
811         if (state != UpdateState::Merging) {
812             // Either there is no merge, or the merge was finished, so no need
813             // to keep waiting.
814             return state;
815         }
816 
817         if (callback && !callback()) {
818             return state;
819         }
820 
821         // This wait is not super time sensitive, so we have a relatively
822         // low polling frequency.
823         std::this_thread::sleep_for(kUpdateStateCheckInterval);
824     }
825 }
826 
CheckMergeState(const std::function<bool ()> & before_cancel)827 UpdateState SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) {
828     auto lock = LockExclusive();
829     if (!lock) {
830         return UpdateState::MergeFailed;
831     }
832 
833     UpdateState state = CheckMergeState(lock.get(), before_cancel);
834     if (state == UpdateState::MergeCompleted) {
835         // Do this inside the same lock. Failures get acknowledged without the
836         // lock, because flock() might have failed.
837         AcknowledgeMergeSuccess(lock.get());
838     } else if (state == UpdateState::Cancelled) {
839         if (!RemoveAllUpdateState(lock.get(), before_cancel)) {
840             return ReadSnapshotUpdateStatus(lock.get()).state();
841         }
842     }
843     return state;
844 }
845 
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)846 UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
847                                              const std::function<bool()>& before_cancel) {
848     UpdateState state = ReadUpdateState(lock);
849     switch (state) {
850         case UpdateState::None:
851         case UpdateState::MergeCompleted:
852             // Harmless races are allowed between two callers of WaitForMerge,
853             // so in both of these cases we just propagate the state.
854             return state;
855 
856         case UpdateState::Merging:
857         case UpdateState::MergeNeedsReboot:
858         case UpdateState::MergeFailed:
859             // We'll poll each snapshot below. Note that for the NeedsReboot
860             // case, we always poll once to give cleanup another opportunity to
861             // run.
862             break;
863 
864         case UpdateState::Unverified:
865             // This is an edge case. Normally cancelled updates are detected
866             // via the merge poll below, but if we never started a merge, we
867             // need to also check here.
868             if (HandleCancelledUpdate(lock, before_cancel)) {
869                 return UpdateState::Cancelled;
870             }
871             return state;
872 
873         default:
874             return state;
875     }
876 
877     std::vector<std::string> snapshots;
878     if (!ListSnapshots(lock, &snapshots)) {
879         return UpdateState::MergeFailed;
880     }
881 
882     bool cancelled = false;
883     bool failed = false;
884     bool merging = false;
885     bool needs_reboot = false;
886     for (const auto& snapshot : snapshots) {
887         UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot);
888         switch (snapshot_state) {
889             case UpdateState::MergeFailed:
890                 failed = true;
891                 break;
892             case UpdateState::Merging:
893                 merging = true;
894                 break;
895             case UpdateState::MergeNeedsReboot:
896                 needs_reboot = true;
897                 break;
898             case UpdateState::MergeCompleted:
899                 break;
900             case UpdateState::Cancelled:
901                 cancelled = true;
902                 break;
903             default:
904                 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
905                            << "\"" << snapshot_state << "\"";
906                 failed = true;
907                 break;
908         }
909     }
910 
911     if (merging) {
912         // Note that we handle "Merging" before we handle anything else. We
913         // want to poll until *nothing* is merging if we can, so everything has
914         // a chance to get marked as completed or failed.
915         return UpdateState::Merging;
916     }
917     if (failed) {
918         // Note: since there are many drop-out cases for failure, we acknowledge
919         // it in WaitForMerge rather than here and elsewhere.
920         return UpdateState::MergeFailed;
921     }
922     if (needs_reboot) {
923         WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
924         return UpdateState::MergeNeedsReboot;
925     }
926     if (cancelled) {
927         // This is an edge case, that we handle as correctly as we sensibly can.
928         // The underlying partition has changed behind update_engine, and we've
929         // removed the snapshot as a result. The exact state of the update is
930         // undefined now, but this can only happen on an unlocked device where
931         // partitions can be flashed without wiping userdata.
932         return UpdateState::Cancelled;
933     }
934     return UpdateState::MergeCompleted;
935 }
936 
CheckTargetMergeState(LockedFile * lock,const std::string & name)937 UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name) {
938     SnapshotStatus snapshot_status;
939     if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
940         return UpdateState::MergeFailed;
941     }
942 
943     std::string dm_name = GetSnapshotDeviceName(name, snapshot_status);
944 
945     std::unique_ptr<LpMetadata> current_metadata;
946 
947     if (!IsSnapshotDevice(dm_name)) {
948         if (!current_metadata) {
949             current_metadata = ReadCurrentMetadata();
950         }
951 
952         if (!current_metadata ||
953             GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
954             DeleteSnapshot(lock, name);
955             return UpdateState::Cancelled;
956         }
957 
958         // During a check, we decided the merge was complete, but we were unable to
959         // collapse the device-mapper stack and perform COW cleanup. If we haven't
960         // rebooted after this check, the device will still be a snapshot-merge
961         // target. If the have rebooted, the device will now be a linear target,
962         // and we can try cleanup again.
963         if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
964             // NB: It's okay if this fails now, we gave cleanup our best effort.
965             OnSnapshotMergeComplete(lock, name, snapshot_status);
966             return UpdateState::MergeCompleted;
967         }
968 
969         LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << dm_name;
970         return UpdateState::MergeFailed;
971     }
972 
973     // This check is expensive so it is only enabled for debugging.
974     DCHECK((current_metadata = ReadCurrentMetadata()) &&
975            GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
976 
977     std::string target_type;
978     DmTargetSnapshot::Status status;
979     if (!QuerySnapshotStatus(dm_name, &target_type, &status)) {
980         return UpdateState::MergeFailed;
981     }
982     if (target_type != "snapshot-merge") {
983         // We can get here if we failed to rewrite the target type in
984         // InitiateMerge(). If we failed to create the target in first-stage
985         // init, boot would not succeed.
986         LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
987         return UpdateState::MergeFailed;
988     }
989 
990     // These two values are equal when merging is complete.
991     if (status.sectors_allocated != status.metadata_sectors) {
992         if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
993             LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete.";
994             return UpdateState::MergeFailed;
995         }
996         return UpdateState::Merging;
997     }
998 
999     // Merging is done. First, update the status file to indicate the merge
1000     // is complete. We do this before calling OnSnapshotMergeComplete, even
1001     // though this means the write is potentially wasted work (since in the
1002     // ideal case we'll immediately delete the file).
1003     //
1004     // This makes it simpler to reason about the next reboot: no matter what
1005     // part of cleanup failed, first-stage init won't try to create another
1006     // snapshot device for this partition.
1007     snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1008     if (!WriteSnapshotStatus(lock, snapshot_status)) {
1009         return UpdateState::MergeFailed;
1010     }
1011     if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1012         return UpdateState::MergeNeedsReboot;
1013     }
1014     return UpdateState::MergeCompleted;
1015 }
1016 
GetSnapshotBootIndicatorPath()1017 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1018     return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1019 }
1020 
GetRollbackIndicatorPath()1021 std::string SnapshotManager::GetRollbackIndicatorPath() {
1022     return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1023 }
1024 
GetForwardMergeIndicatorPath()1025 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1026     return metadata_dir_ + "/allow-forward-merge";
1027 }
1028 
AcknowledgeMergeSuccess(LockedFile * lock)1029 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1030     // It's not possible to remove update state in recovery, so write an
1031     // indicator that cleanup is needed on reboot. If a factory data reset
1032     // was requested, it doesn't matter, everything will get wiped anyway.
1033     // To make testing easier we consider a /data wipe as cleaned up.
1034     if (device_->IsRecovery() && !in_factory_data_reset_) {
1035         WriteUpdateState(lock, UpdateState::MergeCompleted);
1036         return;
1037     }
1038 
1039     RemoveAllUpdateState(lock);
1040 }
1041 
AcknowledgeMergeFailure()1042 void SnapshotManager::AcknowledgeMergeFailure() {
1043     // Log first, so worst case, we always have a record of why the calls below
1044     // were being made.
1045     LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1046 
1047     auto lock = LockExclusive();
1048     if (!lock) return;
1049 
1050     // Since we released the lock in between WaitForMerge and here, it's
1051     // possible (1) the merge successfully completed or (2) was already
1052     // marked as a failure. So make sure to check the state again, and
1053     // only mark as a failure if appropriate.
1054     UpdateState state = ReadUpdateState(lock.get());
1055     if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1056         return;
1057     }
1058 
1059     WriteUpdateState(lock.get(), UpdateState::MergeFailed);
1060 }
1061 
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1062 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1063                                               const SnapshotStatus& status) {
1064     auto dm_name = GetSnapshotDeviceName(name, status);
1065     if (IsSnapshotDevice(dm_name)) {
1066         // We are extra-cautious here, to avoid deleting the wrong table.
1067         std::string target_type;
1068         DmTargetSnapshot::Status dm_status;
1069         if (!QuerySnapshotStatus(dm_name, &target_type, &dm_status)) {
1070             return false;
1071         }
1072         if (target_type != "snapshot-merge") {
1073             LOG(ERROR) << "Unexpected target type " << target_type
1074                        << " for snapshot device: " << dm_name;
1075             return false;
1076         }
1077         if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1078             LOG(ERROR) << "Merge is unexpectedly incomplete for device " << dm_name;
1079             return false;
1080         }
1081         if (!CollapseSnapshotDevice(name, status)) {
1082             LOG(ERROR) << "Unable to collapse snapshot: " << name;
1083             return false;
1084         }
1085         // Note that collapsing is implicitly an Unmap, so we don't need to
1086         // unmap the snapshot.
1087     }
1088 
1089     if (!DeleteSnapshot(lock, name)) {
1090         LOG(ERROR) << "Could not delete snapshot: " << name;
1091         return false;
1092     }
1093     return true;
1094 }
1095 
CollapseSnapshotDevice(const std::string & name,const SnapshotStatus & status)1096 bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
1097                                              const SnapshotStatus& status) {
1098     auto& dm = DeviceMapper::Instance();
1099     auto dm_name = GetSnapshotDeviceName(name, status);
1100 
1101     // Verify we have a snapshot-merge device.
1102     DeviceMapper::TargetInfo target;
1103     if (!GetSingleTarget(dm_name, TableQuery::Table, &target)) {
1104         return false;
1105     }
1106     if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1107         // This should be impossible, it was checked earlier.
1108         LOG(ERROR) << "Snapshot device has invalid target type: " << dm_name;
1109         return false;
1110     }
1111 
1112     std::string base_device, cow_device;
1113     if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1114         LOG(ERROR) << "Could not parse snapshot device " << dm_name
1115                    << " parameters: " << target.data;
1116         return false;
1117     }
1118 
1119     uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1120     if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1121         LOG(ERROR) << "Snapshot " << name
1122                    << " size is not sector aligned: " << status.snapshot_size();
1123         return false;
1124     }
1125 
1126     if (dm_name != name) {
1127         // We've derived the base device, but we actually need to replace the
1128         // table of the outermost device. Do a quick verification that this
1129         // device looks like we expect it to.
1130         std::vector<DeviceMapper::TargetInfo> outer_table;
1131         if (!dm.GetTableInfo(name, &outer_table)) {
1132             LOG(ERROR) << "Could not validate outer snapshot table: " << name;
1133             return false;
1134         }
1135         if (outer_table.size() != 2) {
1136             LOG(ERROR) << "Expected 2 dm-linear targets for table " << name
1137                        << ", got: " << outer_table.size();
1138             return false;
1139         }
1140         for (const auto& target : outer_table) {
1141             auto target_type = DeviceMapper::GetTargetType(target.spec);
1142             if (target_type != "linear") {
1143                 LOG(ERROR) << "Outer snapshot table may only contain linear targets, but " << name
1144                            << " has target: " << target_type;
1145                 return false;
1146             }
1147         }
1148         if (outer_table[0].spec.length != snapshot_sectors) {
1149             LOG(ERROR) << "dm-snapshot " << name << " should have " << snapshot_sectors
1150                        << " sectors, got: " << outer_table[0].spec.length;
1151             return false;
1152         }
1153         uint64_t expected_device_sectors = status.device_size() / kSectorSize;
1154         uint64_t actual_device_sectors = outer_table[0].spec.length + outer_table[1].spec.length;
1155         if (expected_device_sectors != actual_device_sectors) {
1156             LOG(ERROR) << "Outer device " << name << " should have " << expected_device_sectors
1157                        << " sectors, got: " << actual_device_sectors;
1158             return false;
1159         }
1160     }
1161 
1162     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1163     // Create a DmTable that is identical to the base device.
1164     CreateLogicalPartitionParams base_device_params{
1165             .block_device = device_->GetSuperDevice(slot),
1166             .metadata_slot = slot,
1167             .partition_name = name,
1168             .partition_opener = &device_->GetPartitionOpener(),
1169     };
1170     DmTable table;
1171     if (!CreateDmTable(base_device_params, &table)) {
1172         LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1173         return false;
1174     }
1175 
1176     // Note: we are replacing the *outer* table here, so we do not use dm_name.
1177     if (!dm.LoadTableAndActivate(name, table)) {
1178         return false;
1179     }
1180 
1181     // Attempt to delete the snapshot device if one still exists. Nothing
1182     // should be depending on the device, and device-mapper should have
1183     // flushed remaining I/O. We could in theory replace with dm-zero (or
1184     // re-use the table above), but for now it's better to know why this
1185     // would fail.
1186     if (dm_name != name && !dm.DeleteDeviceIfExists(dm_name)) {
1187         LOG(ERROR) << "Unable to delete snapshot device " << dm_name << ", COW cannot be "
1188                    << "reclaimed until after reboot.";
1189         return false;
1190     }
1191 
1192     // Cleanup the base device as well, since it is no longer used. This does
1193     // not block cleanup.
1194     auto base_name = GetBaseDeviceName(name);
1195     if (!dm.DeleteDeviceIfExists(base_name)) {
1196         LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1197     }
1198     return true;
1199 }
1200 
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1201 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1202                                             const std::function<bool()>& before_cancel) {
1203     auto slot = GetCurrentSlot();
1204     if (slot == Slot::Unknown) {
1205         return false;
1206     }
1207 
1208     // If all snapshots were reflashed, then cancel the entire update.
1209     if (AreAllSnapshotsCancelled(lock)) {
1210         LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1211         return RemoveAllUpdateState(lock, before_cancel);
1212     }
1213 
1214     // If update has been rolled back, then cancel the entire update.
1215     // Client (update_engine) is responsible for doing additional cleanup work on its own states
1216     // when ProcessUpdateState() returns UpdateState::Cancelled.
1217     auto current_slot = GetCurrentSlot();
1218     if (current_slot != Slot::Source) {
1219         LOG(INFO) << "Update state is being processed while booting at " << current_slot
1220                   << " slot, taking no action.";
1221         return false;
1222     }
1223 
1224     // current_slot == Source. Attempt to detect rollbacks.
1225     if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1226         // This unverified update is not attempted. Take no action.
1227         PLOG(INFO) << "Rollback indicator not detected. "
1228                    << "Update state is being processed before reboot, taking no action.";
1229         return false;
1230     }
1231 
1232     LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1233     return RemoveAllUpdateState(lock, before_cancel);
1234 }
1235 
ReadCurrentMetadata()1236 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1237     const auto& opener = device_->GetPartitionOpener();
1238     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1239     auto super_device = device_->GetSuperDevice(slot);
1240     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1241     if (!metadata) {
1242         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1243         return nullptr;
1244     }
1245     return metadata;
1246 }
1247 
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1248 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1249         const LpMetadata& metadata, const std::string& name) {
1250     auto partition = android::fs_mgr::FindPartition(metadata, name);
1251     if (!partition) return MetadataPartitionState::None;
1252     if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1253         return MetadataPartitionState::Updated;
1254     }
1255     return MetadataPartitionState::Flashed;
1256 }
1257 
AreAllSnapshotsCancelled(LockedFile * lock)1258 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1259     std::vector<std::string> snapshots;
1260     if (!ListSnapshots(lock, &snapshots)) {
1261         LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1262                      << "after applying an update. Assuming no snapshots.";
1263         // Let HandleCancelledUpdate resets UpdateState.
1264         return true;
1265     }
1266 
1267     std::map<std::string, bool> flashing_status;
1268 
1269     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1270         LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1271                      << "removing update states.";
1272         return false;
1273     }
1274 
1275     bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1276                                                [](const auto& pair) { return pair.second; });
1277 
1278     if (all_snapshots_cancelled) {
1279         LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1280     }
1281     return all_snapshots_cancelled;
1282 }
1283 
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1284 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1285                                                 const std::vector<std::string>& snapshots,
1286                                                 std::map<std::string, bool>* out) {
1287     CHECK(lock);
1288 
1289     auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1290     if (source_slot_suffix.empty()) {
1291         return false;
1292     }
1293     uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1294     uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1295 
1296     // Attempt to detect re-flashing on each partition.
1297     // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1298     // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1299     //   deleted. Caller is responsible for merging the rest of the snapshots.
1300     // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1301     //
1302     // Note that we use target slot metadata, since if an OTA has been applied
1303     // to the target slot, we can detect the UPDATED flag. Any kind of flash
1304     // operation against dynamic partitions ensures that all copies of the
1305     // metadata are in sync, so flashing all partitions on the source slot will
1306     // remove the UPDATED flag on the target slot as well.
1307     const auto& opener = device_->GetPartitionOpener();
1308     auto super_device = device_->GetSuperDevice(target_slot);
1309     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1310     if (!metadata) {
1311         return false;
1312     }
1313 
1314     for (const auto& snapshot_name : snapshots) {
1315         if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1316             MetadataPartitionState::Updated) {
1317             out->emplace(snapshot_name, false);
1318         } else {
1319             // Delete snapshots for partitions that are re-flashed after the update.
1320             LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1321             out->emplace(snapshot_name, true);
1322         }
1323     }
1324     return true;
1325 }
1326 
RemoveAllSnapshots(LockedFile * lock)1327 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1328     std::vector<std::string> snapshots;
1329     if (!ListSnapshots(lock, &snapshots)) {
1330         LOG(ERROR) << "Could not list snapshots";
1331         return false;
1332     }
1333 
1334     std::map<std::string, bool> flashing_status;
1335     if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1336         LOG(WARNING) << "Failed to get flashing status";
1337     }
1338 
1339     auto current_slot = GetCurrentSlot();
1340     bool ok = true;
1341     bool has_mapped_cow_images = false;
1342     for (const auto& name : snapshots) {
1343         // If booting off source slot, it is okay to unmap and delete all the snapshots.
1344         // If boot indicator is missing, update state is None or Initiated, so
1345         //   it is also okay to unmap and delete all the snapshots.
1346         // If booting off target slot,
1347         //  - should not unmap because:
1348         //    - In Android mode, snapshots are not mapped, but
1349         //      filesystems are mounting off dm-linear targets directly.
1350         //    - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1351         //  - If partition is flashed or unknown, it is okay to delete snapshots.
1352         //    Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1353         //    as dm-snapshot (for example, after merge completes).
1354         bool should_unmap = current_slot != Slot::Target;
1355         bool should_delete = ShouldDeleteSnapshot(lock, flashing_status, current_slot, name);
1356 
1357         bool partition_ok = true;
1358         if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1359             partition_ok = false;
1360         }
1361         if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1362             partition_ok = false;
1363         }
1364 
1365         if (!partition_ok) {
1366             // Remember whether or not we were able to unmap the cow image.
1367             auto cow_image_device = GetCowImageDeviceName(name);
1368             has_mapped_cow_images |=
1369                     (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
1370 
1371             ok = false;
1372         }
1373     }
1374 
1375     if (ok || !has_mapped_cow_images) {
1376         // Delete any image artifacts as a precaution, in case an update is
1377         // being cancelled due to some corrupted state in an lp_metadata file.
1378         // Note that we do not do this if some cow images are still mapped,
1379         // since we must not remove backing storage if it's in use.
1380         if (!EnsureImageManager() || !images_->RemoveAllImages()) {
1381             LOG(ERROR) << "Could not remove all snapshot artifacts";
1382             return false;
1383         }
1384     }
1385     return ok;
1386 }
1387 
1388 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(LockedFile * lock,const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)1389 bool SnapshotManager::ShouldDeleteSnapshot(LockedFile* lock,
1390                                            const std::map<std::string, bool>& flashing_status,
1391                                            Slot current_slot, const std::string& name) {
1392     if (current_slot != Slot::Target) {
1393         return true;
1394     }
1395     auto it = flashing_status.find(name);
1396     if (it == flashing_status.end()) {
1397         LOG(WARNING) << "Can't determine flashing status for " << name;
1398         return true;
1399     }
1400     if (it->second) {
1401         // partition flashed, okay to delete obsolete snapshots
1402         return true;
1403     }
1404     // partition updated, only delete if not dm-snapshot
1405     SnapshotStatus status;
1406     if (!ReadSnapshotStatus(lock, name, &status)) {
1407         LOG(WARNING) << "Unable to read snapshot status for " << name
1408                      << ", guessing snapshot device name";
1409         auto extra_name = GetSnapshotExtraDeviceName(name);
1410         return !IsSnapshotDevice(name) && !IsSnapshotDevice(extra_name);
1411     }
1412     auto dm_name = GetSnapshotDeviceName(name, status);
1413     return !IsSnapshotDevice(dm_name);
1414 }
1415 
GetUpdateState(double * progress)1416 UpdateState SnapshotManager::GetUpdateState(double* progress) {
1417     // If we've never started an update, the state file won't exist.
1418     auto state_file = GetStateFilePath();
1419     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
1420         return UpdateState::None;
1421     }
1422 
1423     auto lock = LockShared();
1424     if (!lock) {
1425         return UpdateState::None;
1426     }
1427 
1428     SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
1429     auto state = update_status.state();
1430     if (progress == nullptr) {
1431         return state;
1432     }
1433 
1434     if (state == UpdateState::MergeCompleted) {
1435         *progress = 100.0;
1436         return state;
1437     }
1438 
1439     *progress = 0.0;
1440     if (state != UpdateState::Merging) {
1441         return state;
1442     }
1443 
1444     // Sum all the snapshot states as if the system consists of a single huge
1445     // snapshots device, then compute the merge completion percentage of that
1446     // device.
1447     std::vector<std::string> snapshots;
1448     if (!ListSnapshots(lock.get(), &snapshots)) {
1449         LOG(ERROR) << "Could not list snapshots";
1450         return state;
1451     }
1452 
1453     DmTargetSnapshot::Status fake_snapshots_status = {};
1454     for (const auto& snapshot : snapshots) {
1455         DmTargetSnapshot::Status current_status;
1456 
1457         if (!QuerySnapshotStatus(snapshot, nullptr, &current_status)) continue;
1458 
1459         fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
1460         fake_snapshots_status.total_sectors += current_status.total_sectors;
1461         fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
1462     }
1463 
1464     *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
1465                                                update_status.sectors_allocated());
1466 
1467     return state;
1468 }
1469 
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots)1470 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots) {
1471     CHECK(lock);
1472 
1473     auto dir_path = metadata_dir_ + "/snapshots"s;
1474     std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
1475     if (!dir) {
1476         PLOG(ERROR) << "opendir failed: " << dir_path;
1477         return false;
1478     }
1479 
1480     struct dirent* dp;
1481     while ((dp = readdir(dir.get())) != nullptr) {
1482         if (dp->d_type != DT_REG) continue;
1483         snapshots->emplace_back(dp->d_name);
1484     }
1485     return true;
1486 }
1487 
IsSnapshotManagerNeeded()1488 bool SnapshotManager::IsSnapshotManagerNeeded() {
1489     return access(kBootIndicatorPath, F_OK) == 0;
1490 }
1491 
GetGlobalRollbackIndicatorPath()1492 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
1493     return kRollbackIndicatorPath;
1494 }
1495 
NeedSnapshotsInFirstStageMount()1496 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
1497     // If we fail to read, we'll wind up using CreateLogicalPartitions, which
1498     // will create devices that look like the old slot, except with extra
1499     // content at the end of each device. This will confuse dm-verity, and
1500     // ultimately we'll fail to boot. Why not make it a fatal error and have
1501     // the reason be clearer? Because the indicator file still exists, and
1502     // if this was FATAL, reverting to the old slot would be broken.
1503     auto slot = GetCurrentSlot();
1504 
1505     if (slot != Slot::Target) {
1506         if (slot == Slot::Source) {
1507             // Device is rebooting into the original slot, so mark this as a
1508             // rollback.
1509             auto path = GetRollbackIndicatorPath();
1510             if (!android::base::WriteStringToFile("1", path)) {
1511                 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
1512             } else {
1513                 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
1514             }
1515         }
1516         LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
1517         return false;
1518     }
1519 
1520     // If we can't read the update state, it's unlikely anything else will
1521     // succeed, so this is a fatal error. We'll eventually exhaust boot
1522     // attempts and revert to the old slot.
1523     auto lock = LockShared();
1524     if (!lock) {
1525         LOG(FATAL) << "Could not read update state to determine snapshot status";
1526         return false;
1527     }
1528     switch (ReadUpdateState(lock.get())) {
1529         case UpdateState::Unverified:
1530         case UpdateState::Merging:
1531         case UpdateState::MergeFailed:
1532             return true;
1533         default:
1534             return false;
1535     }
1536 }
1537 
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)1538 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
1539         const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
1540     LOG(INFO) << "Creating logical partitions with snapshots as needed";
1541 
1542     auto lock = LockExclusive();
1543     if (!lock) return false;
1544 
1545     const auto& opener = device_->GetPartitionOpener();
1546     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1547     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1548     if (!metadata) {
1549         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1550         return false;
1551     }
1552 
1553     for (const auto& partition : metadata->partitions) {
1554         if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
1555             LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
1556                       << kCowGroupName;
1557             continue;
1558         }
1559 
1560         CreateLogicalPartitionParams params = {
1561                 .block_device = super_device,
1562                 .metadata = metadata.get(),
1563                 .partition = &partition,
1564                 .partition_opener = &opener,
1565                 .timeout_ms = timeout_ms,
1566         };
1567         std::string ignore_path;
1568         if (!MapPartitionWithSnapshot(lock.get(), std::move(params), &ignore_path)) {
1569             return false;
1570         }
1571     }
1572 
1573     LOG(INFO) << "Created logical partitions with snapshot.";
1574     return true;
1575 }
1576 
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)1577 static std::chrono::milliseconds GetRemainingTime(
1578         const std::chrono::milliseconds& timeout,
1579         const std::chrono::time_point<std::chrono::steady_clock>& begin) {
1580     // If no timeout is specified, execute all commands without specifying any timeout.
1581     if (timeout.count() == 0) return std::chrono::milliseconds(0);
1582     auto passed_time = std::chrono::steady_clock::now() - begin;
1583     auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
1584     if (remaining_time.count() <= 0) {
1585         LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
1586                    << remaining_time.count() << "ms remaining)";
1587         // Return min() instead of remaining_time here because 0 is treated as a special value for
1588         // no timeout, where the rest of the commands will still be executed.
1589         return std::chrono::milliseconds::min();
1590     }
1591     return remaining_time;
1592 }
1593 
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,std::string * path)1594 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
1595                                                CreateLogicalPartitionParams params,
1596                                                std::string* path) {
1597     auto begin = std::chrono::steady_clock::now();
1598 
1599     CHECK(lock);
1600     path->clear();
1601 
1602     if (params.GetPartitionName() != params.GetDeviceName()) {
1603         LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
1604                    << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
1605         return false;
1606     }
1607 
1608     // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
1609     // reading super partition metadata).
1610     CreateLogicalPartitionParams::OwnedData params_owned_data;
1611     if (!params.InitDefaults(&params_owned_data)) {
1612         return false;
1613     }
1614 
1615     if (!params.partition->num_extents) {
1616         LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
1617         return true;  // leave path empty to indicate that nothing is mapped.
1618     }
1619 
1620     // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
1621     // partition still has a snapshot that needs to be mapped.  If no live snapshot or merge
1622     // completed, live_snapshot_status is set to nullopt.
1623     std::optional<SnapshotStatus> live_snapshot_status;
1624     do {
1625         if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
1626             LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
1627                       << params.GetPartitionName();
1628             break;
1629         }
1630         auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
1631         if (access(file_path.c_str(), F_OK) != 0) {
1632             if (errno != ENOENT) {
1633                 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
1634                            << ": Can't access " << file_path;
1635                 return false;
1636             }
1637             break;
1638         }
1639         live_snapshot_status = std::make_optional<SnapshotStatus>();
1640         if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
1641             return false;
1642         }
1643         // No live snapshot if merge is completed.
1644         if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
1645             live_snapshot_status.reset();
1646         }
1647 
1648         if (live_snapshot_status->state() == SnapshotState::NONE ||
1649             live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
1650                     0) {
1651             LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
1652                          << " is invalid, ignoring: state = "
1653                          << SnapshotState_Name(live_snapshot_status->state())
1654                          << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
1655                          << ", cow_file_size = " << live_snapshot_status->cow_file_size();
1656             live_snapshot_status.reset();
1657         }
1658     } while (0);
1659 
1660     if (live_snapshot_status.has_value()) {
1661         // dm-snapshot requires the base device to be writable.
1662         params.force_writable = true;
1663         // Map the base device with a different name to avoid collision.
1664         params.device_name = GetBaseDeviceName(params.GetPartitionName());
1665     }
1666 
1667     AutoDeviceList created_devices;
1668 
1669     // Create the base device for the snapshot, or if there is no snapshot, the
1670     // device itself. This device consists of the real blocks in the super
1671     // partition that this logical partition occupies.
1672     auto& dm = DeviceMapper::Instance();
1673     std::string base_path;
1674     if (!CreateLogicalPartition(params, &base_path)) {
1675         LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
1676                    << " as device " << params.GetDeviceName();
1677         return false;
1678     }
1679     created_devices.EmplaceBack<AutoUnmapDevice>(&dm, params.GetDeviceName());
1680 
1681     if (!live_snapshot_status.has_value()) {
1682         *path = base_path;
1683         created_devices.Release();
1684         return true;
1685     }
1686 
1687     // We don't have ueventd in first-stage init, so use device major:minor
1688     // strings instead.
1689     std::string base_device;
1690     if (!dm.GetDeviceString(params.GetDeviceName(), &base_device)) {
1691         LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
1692         return false;
1693     }
1694 
1695     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1696     if (remaining_time.count() < 0) return false;
1697 
1698     std::string cow_name;
1699     CreateLogicalPartitionParams cow_params = params;
1700     cow_params.timeout_ms = remaining_time;
1701     if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
1702         return false;
1703     }
1704     std::string cow_device;
1705     if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
1706         LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
1707         return false;
1708     }
1709 
1710     remaining_time = GetRemainingTime(params.timeout_ms, begin);
1711     if (remaining_time.count() < 0) return false;
1712 
1713     if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
1714                      path)) {
1715         LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
1716         return false;
1717     }
1718     // No need to add params.GetPartitionName() to created_devices since it is immediately released.
1719 
1720     created_devices.Release();
1721 
1722     LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << *path;
1723 
1724     return true;
1725 }
1726 
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)1727 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
1728                                                  const std::string& target_partition_name) {
1729     CHECK(lock);
1730 
1731     if (!UnmapSnapshot(lock, target_partition_name)) {
1732         return false;
1733     }
1734 
1735     if (!UnmapCowDevices(lock, target_partition_name)) {
1736         return false;
1737     }
1738 
1739     auto& dm = DeviceMapper::Instance();
1740     std::string base_name = GetBaseDeviceName(target_partition_name);
1741     if (!dm.DeleteDeviceIfExists(base_name)) {
1742         LOG(ERROR) << "Cannot delete base device: " << base_name;
1743         return false;
1744     }
1745 
1746     LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
1747 
1748     return true;
1749 }
1750 
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)1751 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
1752                                     const SnapshotStatus& snapshot_status,
1753                                     AutoDeviceList* created_devices, std::string* cow_name) {
1754     CHECK(lock);
1755     CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
1756     auto begin = std::chrono::steady_clock::now();
1757 
1758     std::string partition_name = params.GetPartitionName();
1759     std::string cow_image_name = GetCowImageDeviceName(partition_name);
1760     *cow_name = GetCowName(partition_name);
1761 
1762     auto& dm = DeviceMapper::Instance();
1763 
1764     // Map COW image if necessary.
1765     if (snapshot_status.cow_file_size() > 0) {
1766         if (!EnsureImageManager()) return false;
1767         auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1768         if (remaining_time.count() < 0) return false;
1769 
1770         if (!MapCowImage(partition_name, remaining_time).has_value()) {
1771             LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
1772             return false;
1773         }
1774         created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
1775 
1776         // If no COW partition exists, just return the image alone.
1777         if (snapshot_status.cow_partition_size() == 0) {
1778             *cow_name = std::move(cow_image_name);
1779             LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
1780             return true;
1781         }
1782     }
1783 
1784     auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1785     if (remaining_time.count() < 0) return false;
1786 
1787     CHECK(snapshot_status.cow_partition_size() > 0);
1788 
1789     // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
1790     // COW image device as the last extent.
1791     CreateLogicalPartitionParams cow_partition_params = params;
1792     cow_partition_params.partition = nullptr;
1793     cow_partition_params.partition_name = *cow_name;
1794     cow_partition_params.device_name.clear();
1795     DmTable table;
1796     if (!CreateDmTable(cow_partition_params, &table)) {
1797         return false;
1798     }
1799     // If the COW image exists, append it as the last extent.
1800     if (snapshot_status.cow_file_size() > 0) {
1801         std::string cow_image_device;
1802         if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
1803             LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
1804             return false;
1805         }
1806         auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
1807         auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
1808         table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
1809                                       0);
1810     }
1811 
1812     // We have created the DmTable now. Map it.
1813     std::string cow_path;
1814     if (!dm.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
1815         LOG(ERROR) << "Could not create COW device: " << *cow_name;
1816         return false;
1817     }
1818     created_devices->EmplaceBack<AutoUnmapDevice>(&dm, *cow_name);
1819     LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
1820     return true;
1821 }
1822 
UnmapCowDevices(LockedFile * lock,const std::string & name)1823 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
1824     CHECK(lock);
1825     if (!EnsureImageManager()) return false;
1826 
1827     auto& dm = DeviceMapper::Instance();
1828     auto cow_name = GetCowName(name);
1829     if (!dm.DeleteDeviceIfExists(cow_name)) {
1830         LOG(ERROR) << "Cannot unmap " << cow_name;
1831         return false;
1832     }
1833 
1834     std::string cow_image_name = GetCowImageDeviceName(name);
1835     if (!images_->UnmapImageIfExists(cow_image_name)) {
1836         LOG(ERROR) << "Cannot unmap image " << cow_image_name;
1837         return false;
1838     }
1839     return true;
1840 }
1841 
OpenFile(const std::string & file,int lock_flags)1842 auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
1843         -> std::unique_ptr<LockedFile> {
1844     unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
1845     if (fd < 0) {
1846         PLOG(ERROR) << "Open failed: " << file;
1847         return nullptr;
1848     }
1849     if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
1850         PLOG(ERROR) << "Acquire flock failed: " << file;
1851         return nullptr;
1852     }
1853     // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
1854     // calls, so strip extra flags.
1855     int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
1856     return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
1857 }
1858 
~LockedFile()1859 SnapshotManager::LockedFile::~LockedFile() {
1860     if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
1861         PLOG(ERROR) << "Failed to unlock file: " << path_;
1862     }
1863 }
1864 
GetStateFilePath() const1865 std::string SnapshotManager::GetStateFilePath() const {
1866     return metadata_dir_ + "/state"s;
1867 }
1868 
GetMergeStateFilePath() const1869 std::string SnapshotManager::GetMergeStateFilePath() const {
1870     return metadata_dir_ + "/merge_state"s;
1871 }
1872 
GetLockPath() const1873 std::string SnapshotManager::GetLockPath() const {
1874     return metadata_dir_;
1875 }
1876 
OpenLock(int lock_flags)1877 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
1878     auto lock_file = GetLockPath();
1879     return OpenFile(lock_file, lock_flags);
1880 }
1881 
LockShared()1882 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
1883     return OpenLock(LOCK_SH);
1884 }
1885 
LockExclusive()1886 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
1887     return OpenLock(LOCK_EX);
1888 }
1889 
UpdateStateFromString(const std::string & contents)1890 static UpdateState UpdateStateFromString(const std::string& contents) {
1891     if (contents.empty() || contents == "none") {
1892         return UpdateState::None;
1893     } else if (contents == "initiated") {
1894         return UpdateState::Initiated;
1895     } else if (contents == "unverified") {
1896         return UpdateState::Unverified;
1897     } else if (contents == "merging") {
1898         return UpdateState::Merging;
1899     } else if (contents == "merge-completed") {
1900         return UpdateState::MergeCompleted;
1901     } else if (contents == "merge-needs-reboot") {
1902         return UpdateState::MergeNeedsReboot;
1903     } else if (contents == "merge-failed") {
1904         return UpdateState::MergeFailed;
1905     } else if (contents == "cancelled") {
1906         return UpdateState::Cancelled;
1907     } else {
1908         LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
1909         return UpdateState::None;
1910     }
1911 }
1912 
operator <<(std::ostream & os,UpdateState state)1913 std::ostream& operator<<(std::ostream& os, UpdateState state) {
1914     switch (state) {
1915         case UpdateState::None:
1916             return os << "none";
1917         case UpdateState::Initiated:
1918             return os << "initiated";
1919         case UpdateState::Unverified:
1920             return os << "unverified";
1921         case UpdateState::Merging:
1922             return os << "merging";
1923         case UpdateState::MergeCompleted:
1924             return os << "merge-completed";
1925         case UpdateState::MergeNeedsReboot:
1926             return os << "merge-needs-reboot";
1927         case UpdateState::MergeFailed:
1928             return os << "merge-failed";
1929         case UpdateState::Cancelled:
1930             return os << "cancelled";
1931         default:
1932             LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
1933             return os;
1934     }
1935 }
1936 
ReadUpdateState(LockedFile * lock)1937 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
1938     SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
1939     return status.state();
1940 }
1941 
ReadSnapshotUpdateStatus(LockedFile * lock)1942 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
1943     CHECK(lock);
1944 
1945     SnapshotUpdateStatus status = {};
1946     std::string contents;
1947     if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
1948         PLOG(ERROR) << "Read state file failed";
1949         status.set_state(UpdateState::None);
1950         return status;
1951     }
1952 
1953     if (!status.ParseFromString(contents)) {
1954         LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
1955 
1956         // Try to rollback to legacy file to support devices that are
1957         // currently using the old file format.
1958         // TODO(b/147409432)
1959         status.set_state(UpdateStateFromString(contents));
1960     }
1961 
1962     return status;
1963 }
1964 
WriteUpdateState(LockedFile * lock,UpdateState state)1965 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state) {
1966     SnapshotUpdateStatus status = {};
1967     status.set_state(state);
1968     return WriteSnapshotUpdateStatus(lock, status);
1969 }
1970 
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)1971 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
1972                                                 const SnapshotUpdateStatus& status) {
1973     CHECK(lock);
1974     CHECK(lock->lock_mode() == LOCK_EX);
1975 
1976     std::string contents;
1977     if (!status.SerializeToString(&contents)) {
1978         LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
1979         return false;
1980     }
1981 
1982 #ifdef LIBSNAPSHOT_USE_HAL
1983     auto merge_status = MergeStatus::UNKNOWN;
1984     switch (status.state()) {
1985         // The needs-reboot and completed cases imply that /data and /metadata
1986         // can be safely wiped, so we don't report a merge status.
1987         case UpdateState::None:
1988         case UpdateState::MergeNeedsReboot:
1989         case UpdateState::MergeCompleted:
1990         case UpdateState::Initiated:
1991             merge_status = MergeStatus::NONE;
1992             break;
1993         case UpdateState::Unverified:
1994             merge_status = MergeStatus::SNAPSHOTTED;
1995             break;
1996         case UpdateState::Merging:
1997         case UpdateState::MergeFailed:
1998             merge_status = MergeStatus::MERGING;
1999             break;
2000         default:
2001             // Note that Cancelled flows to here - it is never written, since
2002             // it only communicates a transient state to the caller.
2003             LOG(ERROR) << "Unexpected update status: " << status.state();
2004             break;
2005     }
2006 
2007     bool set_before_write =
2008             merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
2009     if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2010         return false;
2011     }
2012 #endif
2013 
2014     if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
2015         PLOG(ERROR) << "Could not write to state file";
2016         return false;
2017     }
2018 
2019 #ifdef LIBSNAPSHOT_USE_HAL
2020     if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2021         return false;
2022     }
2023 #endif
2024     return true;
2025 }
2026 
GetSnapshotStatusFilePath(const std::string & name)2027 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
2028     auto file = metadata_dir_ + "/snapshots/"s + name;
2029     return file;
2030 }
2031 
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)2032 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
2033                                          SnapshotStatus* status) {
2034     CHECK(lock);
2035     auto path = GetSnapshotStatusFilePath(name);
2036 
2037     unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2038     if (fd < 0) {
2039         PLOG(ERROR) << "Open failed: " << path;
2040         return false;
2041     }
2042 
2043     if (!status->ParseFromFileDescriptor(fd.get())) {
2044         PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
2045         return false;
2046     }
2047 
2048     if (status->name() != name) {
2049         LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
2050         status->set_name(name);
2051     }
2052 
2053     return true;
2054 }
2055 
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)2056 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
2057     // The caller must take an exclusive lock to modify snapshots.
2058     CHECK(lock);
2059     CHECK(lock->lock_mode() == LOCK_EX);
2060     CHECK(!status.name().empty());
2061 
2062     auto path = GetSnapshotStatusFilePath(status.name());
2063 
2064     std::string content;
2065     if (!status.SerializeToString(&content)) {
2066         LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
2067         return false;
2068     }
2069 
2070     if (!WriteStringToFileAtomic(content, path)) {
2071         PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
2072         return false;
2073     }
2074 
2075     return true;
2076 }
2077 
GetSnapshotDeviceName(const std::string & snapshot_name,const SnapshotStatus & status)2078 std::string SnapshotManager::GetSnapshotDeviceName(const std::string& snapshot_name,
2079                                                    const SnapshotStatus& status) {
2080     if (status.device_size() != status.snapshot_size()) {
2081         return GetSnapshotExtraDeviceName(snapshot_name);
2082     }
2083     return snapshot_name;
2084 }
2085 
EnsureImageManager()2086 bool SnapshotManager::EnsureImageManager() {
2087     if (images_) return true;
2088 
2089     // For now, use a preset timeout.
2090     images_ = android::fiemap::IImageManager::Open(gsid_dir_, 15000ms);
2091     if (!images_) {
2092         LOG(ERROR) << "Could not open ImageManager";
2093         return false;
2094     }
2095     return true;
2096 }
2097 
ForceLocalImageManager()2098 bool SnapshotManager::ForceLocalImageManager() {
2099     images_ = android::fiemap::ImageManager::Open(gsid_dir_);
2100     if (!images_) {
2101         LOG(ERROR) << "Could not open ImageManager";
2102         return false;
2103     }
2104     has_local_image_manager_ = true;
2105     return true;
2106 }
2107 
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)2108 static void UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
2109     auto& dm = DeviceMapper::Instance();
2110     std::vector<std::string> to_delete;
2111     for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
2112         if (!dm.DeleteDeviceIfExists(existing_cow_partition->name())) {
2113             LOG(WARNING) << existing_cow_partition->name()
2114                          << " cannot be unmapped and its space cannot be reclaimed";
2115             continue;
2116         }
2117         to_delete.push_back(existing_cow_partition->name());
2118     }
2119     for (const auto& name : to_delete) {
2120         current_metadata->RemovePartition(name);
2121     }
2122 }
2123 
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2124 static Return AddRequiredSpace(Return orig,
2125                                const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2126     if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
2127         return orig;
2128     }
2129     uint64_t sum = 0;
2130     for (auto&& [name, status] : all_snapshot_status) {
2131         sum += status.cow_file_size();
2132     }
2133     return Return::NoSpace(sum);
2134 }
2135 
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)2136 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
2137     auto lock = LockExclusive();
2138     if (!lock) return Return::Error();
2139 
2140     // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
2141     // partition takes up a big chunk of space in super, causing COW images to be created on
2142     // retrofit Virtual A/B devices.
2143     if (device_->IsOverlayfsSetup()) {
2144         LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
2145                    << ", reboot, then try again.";
2146         return Return::Error();
2147     }
2148 
2149     const auto& opener = device_->GetPartitionOpener();
2150     auto current_suffix = device_->GetSlotSuffix();
2151     uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
2152     auto target_suffix = device_->GetOtherSlotSuffix();
2153     uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
2154     auto current_super = device_->GetSuperDevice(current_slot);
2155 
2156     auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
2157     if (current_metadata == nullptr) {
2158         LOG(ERROR) << "Cannot create metadata builder.";
2159         return Return::Error();
2160     }
2161 
2162     auto target_metadata =
2163             MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
2164     if (target_metadata == nullptr) {
2165         LOG(ERROR) << "Cannot create target metadata builder.";
2166         return Return::Error();
2167     }
2168 
2169     // Delete partitions with target suffix in |current_metadata|. Otherwise,
2170     // partition_cow_creator recognizes these left-over partitions as used space.
2171     for (const auto& group_name : current_metadata->ListGroups()) {
2172         if (android::base::EndsWith(group_name, target_suffix)) {
2173             current_metadata->RemoveGroupAndPartitions(group_name);
2174         }
2175     }
2176 
2177     SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
2178     if (!metadata_updater.Update()) {
2179         LOG(ERROR) << "Cannot calculate new metadata.";
2180         return Return::Error();
2181     }
2182 
2183     // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
2184     // free regions.
2185     UnmapAndDeleteCowPartition(current_metadata.get());
2186 
2187     // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
2188     // devices with retrofit dynamic partitions does not make sense.
2189     // This ensures that current_metadata->GetFreeRegions() uses the same device
2190     // indices as target_metadata (i.e. 0 -> "super").
2191     // This is also assumed in MapCowDevices() call below.
2192     CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
2193           target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
2194 
2195     std::map<std::string, SnapshotStatus> all_snapshot_status;
2196 
2197     // In case of error, automatically delete devices that are created along the way.
2198     // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
2199     // these devices.
2200     AutoDeviceList created_devices;
2201 
2202     PartitionCowCreator cow_creator{
2203             .target_metadata = target_metadata.get(),
2204             .target_suffix = target_suffix,
2205             .target_partition = nullptr,
2206             .current_metadata = current_metadata.get(),
2207             .current_suffix = current_suffix,
2208             .operations = nullptr,
2209             .extra_extents = {},
2210     };
2211 
2212     auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
2213                                              &all_snapshot_status);
2214     if (!ret.is_ok()) return ret;
2215 
2216     auto exported_target_metadata = target_metadata->Export();
2217     if (exported_target_metadata == nullptr) {
2218         LOG(ERROR) << "Cannot export target metadata";
2219         return Return::Error();
2220     }
2221 
2222     ret = InitializeUpdateSnapshots(lock.get(), target_metadata.get(),
2223                                     exported_target_metadata.get(), target_suffix,
2224                                     all_snapshot_status);
2225     if (!ret.is_ok()) return ret;
2226 
2227     if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
2228                               *exported_target_metadata, target_slot)) {
2229         LOG(ERROR) << "Cannot write target metadata";
2230         return Return::Error();
2231     }
2232 
2233     created_devices.Release();
2234     LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
2235 
2236     return Return::Ok();
2237 }
2238 
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)2239 Return SnapshotManager::CreateUpdateSnapshotsInternal(
2240         LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
2241         AutoDeviceList* created_devices,
2242         std::map<std::string, SnapshotStatus>* all_snapshot_status) {
2243     CHECK(lock);
2244 
2245     auto* target_metadata = cow_creator->target_metadata;
2246     const auto& target_suffix = cow_creator->target_suffix;
2247 
2248     if (!target_metadata->AddGroup(kCowGroupName, 0)) {
2249         LOG(ERROR) << "Cannot add group " << kCowGroupName;
2250         return Return::Error();
2251     }
2252 
2253     std::map<std::string, const RepeatedPtrField<InstallOperation>*> install_operation_map;
2254     std::map<std::string, std::vector<Extent>> extra_extents_map;
2255     for (const auto& partition_update : manifest.partitions()) {
2256         auto suffixed_name = partition_update.partition_name() + target_suffix;
2257         auto&& [it, inserted] =
2258                 install_operation_map.emplace(suffixed_name, &partition_update.operations());
2259         if (!inserted) {
2260             LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
2261                        << " in update manifest.";
2262             return Return::Error();
2263         }
2264 
2265         auto& extra_extents = extra_extents_map[suffixed_name];
2266         if (partition_update.has_hash_tree_extent()) {
2267             extra_extents.push_back(partition_update.hash_tree_extent());
2268         }
2269         if (partition_update.has_fec_extent()) {
2270             extra_extents.push_back(partition_update.fec_extent());
2271         }
2272     }
2273 
2274     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2275         cow_creator->target_partition = target_partition;
2276         cow_creator->operations = nullptr;
2277         auto operations_it = install_operation_map.find(target_partition->name());
2278         if (operations_it != install_operation_map.end()) {
2279             cow_creator->operations = operations_it->second;
2280         }
2281 
2282         cow_creator->extra_extents.clear();
2283         auto extra_extents_it = extra_extents_map.find(target_partition->name());
2284         if (extra_extents_it != extra_extents_map.end()) {
2285             cow_creator->extra_extents = std::move(extra_extents_it->second);
2286         }
2287 
2288         // Compute the device sizes for the partition.
2289         auto cow_creator_ret = cow_creator->Run();
2290         if (!cow_creator_ret.has_value()) {
2291             return Return::Error();
2292         }
2293 
2294         LOG(INFO) << "For partition " << target_partition->name()
2295                   << ", device size = " << cow_creator_ret->snapshot_status.device_size()
2296                   << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
2297                   << ", cow partition size = "
2298                   << cow_creator_ret->snapshot_status.cow_partition_size()
2299                   << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
2300 
2301         // Delete any existing snapshot before re-creating one.
2302         if (!DeleteSnapshot(lock, target_partition->name())) {
2303             LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
2304                        << target_partition->name();
2305             return Return::Error();
2306         }
2307 
2308         // It is possible that the whole partition uses free space in super, and snapshot / COW
2309         // would not be needed. In this case, skip the partition.
2310         bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
2311         bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
2312                           cow_creator_ret->snapshot_status.cow_file_size()) > 0;
2313         CHECK(needs_snapshot == needs_cow);
2314 
2315         if (!needs_snapshot) {
2316             LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
2317                       << "because nothing needs to be snapshotted.";
2318             continue;
2319         }
2320 
2321         // Store these device sizes to snapshot status file.
2322         if (!CreateSnapshot(lock, &cow_creator_ret->snapshot_status)) {
2323             return Return::Error();
2324         }
2325         created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
2326 
2327         // Create the COW partition. That is, use any remaining free space in super partition before
2328         // creating the COW images.
2329         if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
2330             CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
2331                     << "cow_partition_size == "
2332                     << cow_creator_ret->snapshot_status.cow_partition_size()
2333                     << " is not a multiple of sector size " << kSectorSize;
2334             auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
2335                                                                kCowGroupName, 0 /* flags */);
2336             if (cow_partition == nullptr) {
2337                 return Return::Error();
2338             }
2339 
2340             if (!target_metadata->ResizePartition(
2341                         cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
2342                         cow_creator_ret->cow_partition_usable_regions)) {
2343                 LOG(ERROR) << "Cannot create COW partition on metadata with size "
2344                            << cow_creator_ret->snapshot_status.cow_partition_size();
2345                 return Return::Error();
2346             }
2347             // Only the in-memory target_metadata is modified; nothing to clean up if there is an
2348             // error in the future.
2349         }
2350 
2351         all_snapshot_status->emplace(target_partition->name(),
2352                                      std::move(cow_creator_ret->snapshot_status));
2353 
2354         LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
2355     }
2356 
2357     LOG(INFO) << "Allocating CoW images.";
2358 
2359     for (auto&& [name, snapshot_status] : *all_snapshot_status) {
2360         // Create the backing COW image if necessary.
2361         if (snapshot_status.cow_file_size() > 0) {
2362             auto ret = CreateCowImage(lock, name);
2363             if (!ret.is_ok()) return AddRequiredSpace(ret, *all_snapshot_status);
2364         }
2365 
2366         LOG(INFO) << "Successfully created snapshot for " << name;
2367     }
2368 
2369     return Return::Ok();
2370 }
2371 
InitializeUpdateSnapshots(LockedFile * lock,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2372 Return SnapshotManager::InitializeUpdateSnapshots(
2373         LockedFile* lock, MetadataBuilder* target_metadata,
2374         const LpMetadata* exported_target_metadata, const std::string& target_suffix,
2375         const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2376     CHECK(lock);
2377 
2378     CreateLogicalPartitionParams cow_params{
2379             .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
2380             .metadata = exported_target_metadata,
2381             .timeout_ms = std::chrono::milliseconds::max(),
2382             .partition_opener = &device_->GetPartitionOpener(),
2383     };
2384     for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2385         AutoDeviceList created_devices_for_cow;
2386 
2387         if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
2388             LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
2389                        << target_partition->name();
2390             return Return::Error();
2391         }
2392 
2393         auto it = all_snapshot_status.find(target_partition->name());
2394         if (it == all_snapshot_status.end()) continue;
2395         cow_params.partition_name = target_partition->name();
2396         std::string cow_name;
2397         if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
2398             return Return::Error();
2399         }
2400 
2401         std::string cow_path;
2402         if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
2403             LOG(ERROR) << "Cannot determine path for " << cow_name;
2404             return Return::Error();
2405         }
2406 
2407         auto ret = InitializeCow(cow_path);
2408         if (!ret.is_ok()) {
2409             LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
2410                        << cow_path;
2411             return AddRequiredSpace(ret, all_snapshot_status);
2412         }
2413         // Let destructor of created_devices_for_cow to unmap the COW devices.
2414     };
2415     return Return::Ok();
2416 }
2417 
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)2418 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
2419                                         std::string* snapshot_path) {
2420     auto lock = LockShared();
2421     if (!lock) return false;
2422     if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
2423         LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
2424                    << params.GetPartitionName();
2425         return false;
2426     }
2427     return MapPartitionWithSnapshot(lock.get(), params, snapshot_path);
2428 }
2429 
UnmapUpdateSnapshot(const std::string & target_partition_name)2430 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
2431     auto lock = LockShared();
2432     if (!lock) return false;
2433     return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
2434 }
2435 
UnmapAllPartitions()2436 bool SnapshotManager::UnmapAllPartitions() {
2437     auto lock = LockExclusive();
2438     if (!lock) return false;
2439 
2440     const auto& opener = device_->GetPartitionOpener();
2441     uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2442     auto super_device = device_->GetSuperDevice(slot);
2443     auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
2444     if (!metadata) {
2445         LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
2446         return false;
2447     }
2448 
2449     bool ok = true;
2450     for (const auto& partition : metadata->partitions) {
2451         auto partition_name = GetPartitionName(partition);
2452         ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
2453     }
2454     return ok;
2455 }
2456 
operator <<(std::ostream & os,SnapshotManager::Slot slot)2457 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
2458     switch (slot) {
2459         case SnapshotManager::Slot::Unknown:
2460             return os << "unknown";
2461         case SnapshotManager::Slot::Source:
2462             return os << "source";
2463         case SnapshotManager::Slot::Target:
2464             return os << "target";
2465     }
2466 }
2467 
Dump(std::ostream & os)2468 bool SnapshotManager::Dump(std::ostream& os) {
2469     // Don't actually lock. Dump() is for debugging purposes only, so it is okay
2470     // if it is racy.
2471     auto file = OpenLock(0 /* lock flag */);
2472     if (!file) return false;
2473 
2474     std::stringstream ss;
2475 
2476     ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
2477 
2478     ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
2479     ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
2480     ss << "Rollback indicator: "
2481        << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
2482        << std::endl;
2483     ss << "Forward merge indicator: "
2484        << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
2485        << std::endl;
2486 
2487     bool ok = true;
2488     std::vector<std::string> snapshots;
2489     if (!ListSnapshots(file.get(), &snapshots)) {
2490         LOG(ERROR) << "Could not list snapshots";
2491         snapshots.clear();
2492         ok = false;
2493     }
2494     for (const auto& name : snapshots) {
2495         ss << "Snapshot: " << name << std::endl;
2496         SnapshotStatus status;
2497         if (!ReadSnapshotStatus(file.get(), name, &status)) {
2498             ok = false;
2499             continue;
2500         }
2501         ss << "    state: " << SnapshotState_Name(status.state()) << std::endl;
2502         ss << "    device size (bytes): " << status.device_size() << std::endl;
2503         ss << "    snapshot size (bytes): " << status.snapshot_size() << std::endl;
2504         ss << "    cow partition size (bytes): " << status.cow_partition_size() << std::endl;
2505         ss << "    cow file size (bytes): " << status.cow_file_size() << std::endl;
2506         ss << "    allocated sectors: " << status.sectors_allocated() << std::endl;
2507         ss << "    metadata sectors: " << status.metadata_sectors() << std::endl;
2508     }
2509     os << ss.rdbuf();
2510     return ok;
2511 }
2512 
EnsureMetadataMounted()2513 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
2514     if (!device_->IsRecovery()) {
2515         // No need to mount anything in recovery.
2516         LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
2517         return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
2518     }
2519     auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
2520     if (ret == nullptr) return nullptr;
2521 
2522     // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
2523     // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
2524     // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
2525     // treat this case as if /metadata is not mounted.
2526     if (!LockShared()) {
2527         LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
2528                         "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
2529         return nullptr;
2530     }
2531     return ret;
2532 }
2533 
HandleImminentDataWipe(const std::function<void ()> & callback)2534 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
2535     if (!device_->IsRecovery()) {
2536         LOG(ERROR) << "Data wipes are only allowed in recovery.";
2537         return false;
2538     }
2539 
2540     auto mount = EnsureMetadataMounted();
2541     if (!mount || !mount->HasDevice()) {
2542         // We allow the wipe to continue, because if we can't mount /metadata,
2543         // it is unlikely the device would have booted anyway. If there is no
2544         // metadata partition, then the device predates Virtual A/B.
2545         return true;
2546     }
2547 
2548     // Check this early, so we don't accidentally start trying to populate
2549     // the state file in recovery. Note we don't call GetUpdateState since
2550     // we want errors in acquiring the lock to be propagated, instead of
2551     // returning UpdateState::None.
2552     auto state_file = GetStateFilePath();
2553     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2554         return true;
2555     }
2556 
2557     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2558     auto super_path = device_->GetSuperDevice(slot_number);
2559     if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2560         LOG(ERROR) << "Unable to map partitions to complete merge.";
2561         return false;
2562     }
2563 
2564     auto process_callback = [&]() -> bool {
2565         if (callback) {
2566             callback();
2567         }
2568         return true;
2569     };
2570 
2571     in_factory_data_reset_ = true;
2572     bool ok = ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
2573     in_factory_data_reset_ = false;
2574 
2575     if (!ok) {
2576         return false;
2577     }
2578 
2579     // Nothing should be depending on partitions now, so unmap them all.
2580     if (!UnmapAllPartitions()) {
2581         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
2582     }
2583     return true;
2584 }
2585 
FinishMergeInRecovery()2586 bool SnapshotManager::FinishMergeInRecovery() {
2587     if (!device_->IsRecovery()) {
2588         LOG(ERROR) << "Data wipes are only allowed in recovery.";
2589         return false;
2590     }
2591 
2592     auto mount = EnsureMetadataMounted();
2593     if (!mount || !mount->HasDevice()) {
2594         return false;
2595     }
2596 
2597     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2598     auto super_path = device_->GetSuperDevice(slot_number);
2599     if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2600         LOG(ERROR) << "Unable to map partitions to complete merge.";
2601         return false;
2602     }
2603 
2604     UpdateState state = ProcessUpdateState();
2605     if (state != UpdateState::MergeCompleted) {
2606         LOG(ERROR) << "Merge returned unexpected status: " << state;
2607         return false;
2608     }
2609 
2610     // Nothing should be depending on partitions now, so unmap them all.
2611     if (!UnmapAllPartitions()) {
2612         LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
2613     }
2614     return true;
2615 }
2616 
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)2617 bool SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
2618                                                    const std::function<bool()>& callback) {
2619     auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2620     UpdateState state = ProcessUpdateState(callback);
2621     LOG(INFO) << "Update state in recovery: " << state;
2622     switch (state) {
2623         case UpdateState::MergeFailed:
2624             LOG(ERROR) << "Unrecoverable merge failure detected.";
2625             return false;
2626         case UpdateState::Unverified: {
2627             // If an OTA was just applied but has not yet started merging:
2628             //
2629             // - if forward merge is allowed, initiate merge and call
2630             // ProcessUpdateState again.
2631             //
2632             // - if forward merge is not allowed, we
2633             // have no choice but to revert slots, because the current slot will
2634             // immediately become unbootable. Rather than wait for the device
2635             // to reboot N times until a rollback, we proactively disable the
2636             // new slot instead.
2637             //
2638             // Since the rollback is inevitable, we don't treat a HAL failure
2639             // as an error here.
2640             auto slot = GetCurrentSlot();
2641             if (slot == Slot::Target) {
2642                 if (allow_forward_merge &&
2643                     access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
2644                     LOG(INFO) << "Forward merge allowed, initiating merge now.";
2645                     return InitiateMerge() &&
2646                            ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
2647                 }
2648 
2649                 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
2650                 device_->SetSlotAsUnbootable(slot_number);
2651             } else {
2652                 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
2653             }
2654             break;
2655         }
2656         case UpdateState::MergeNeedsReboot:
2657             // We shouldn't get here, because nothing is depending on
2658             // logical partitions.
2659             LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
2660             break;
2661         default:
2662             break;
2663     }
2664     return true;
2665 }
2666 
EnsureNoOverflowSnapshot(LockedFile * lock)2667 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
2668     CHECK(lock);
2669 
2670     std::vector<std::string> snapshots;
2671     if (!ListSnapshots(lock, &snapshots)) {
2672         LOG(ERROR) << "Could not list snapshots.";
2673         return false;
2674     }
2675 
2676     auto& dm = DeviceMapper::Instance();
2677     for (const auto& snapshot : snapshots) {
2678         std::vector<DeviceMapper::TargetInfo> targets;
2679         if (!dm.GetTableStatus(snapshot, &targets)) {
2680             LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
2681             return false;
2682         }
2683         if (targets.size() != 1) {
2684             LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
2685                        << ", size = " << targets.size();
2686             return false;
2687         }
2688         if (targets[0].IsOverflowSnapshot()) {
2689             LOG(ERROR) << "Detected overflow in snapshot " << snapshot
2690                        << ", CoW device size computation is wrong!";
2691             return false;
2692         }
2693     }
2694 
2695     return true;
2696 }
2697 
RecoveryCreateSnapshotDevices()2698 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
2699     if (!device_->IsRecovery()) {
2700         LOG(ERROR) << __func__ << " is only allowed in recovery.";
2701         return CreateResult::NOT_CREATED;
2702     }
2703 
2704     auto mount = EnsureMetadataMounted();
2705     if (!mount || !mount->HasDevice()) {
2706         LOG(ERROR) << "Couldn't mount Metadata.";
2707         return CreateResult::NOT_CREATED;
2708     }
2709     return RecoveryCreateSnapshotDevices(mount);
2710 }
2711 
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)2712 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
2713         const std::unique_ptr<AutoDevice>& metadata_device) {
2714     if (!device_->IsRecovery()) {
2715         LOG(ERROR) << __func__ << " is only allowed in recovery.";
2716         return CreateResult::NOT_CREATED;
2717     }
2718 
2719     if (metadata_device == nullptr || !metadata_device->HasDevice()) {
2720         LOG(ERROR) << "Metadata not mounted.";
2721         return CreateResult::NOT_CREATED;
2722     }
2723 
2724     auto state_file = GetStateFilePath();
2725     if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2726         LOG(ERROR) << "Couldn't access state file.";
2727         return CreateResult::NOT_CREATED;
2728     }
2729 
2730     if (!NeedSnapshotsInFirstStageMount()) {
2731         return CreateResult::NOT_CREATED;
2732     }
2733 
2734     auto slot_suffix = device_->GetOtherSlotSuffix();
2735     auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2736     auto super_path = device_->GetSuperDevice(slot_number);
2737     if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2738         LOG(ERROR) << "Unable to map partitions.";
2739         return CreateResult::ERROR;
2740     }
2741     return CreateResult::CREATED;
2742 }
2743 
UpdateForwardMergeIndicator(bool wipe)2744 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
2745     auto path = GetForwardMergeIndicatorPath();
2746 
2747     if (!wipe) {
2748         LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
2749         return RemoveFileIfExists(path);
2750     }
2751 
2752     // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
2753 
2754     LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
2755     if (!android::base::WriteStringToFile("1", path)) {
2756         PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
2757         return false;
2758     }
2759 
2760     return true;
2761 }
2762 
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)2763 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
2764                                                        std::string* device_string_or_mapped_path) {
2765     auto& dm = DeviceMapper::Instance();
2766     // Try getting the device string if it is a device mapper device.
2767     if (dm.GetState(device_name) != DmDeviceState::INVALID) {
2768         return dm.GetDeviceString(device_name, device_string_or_mapped_path);
2769     }
2770 
2771     // Otherwise, get path from IImageManager.
2772     if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
2773         return false;
2774     }
2775 
2776     LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
2777                  << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
2778                  << "may not be available in first stage init! ";
2779     return true;
2780 }
2781 
2782 }  // namespace snapshot
2783 }  // namespace android
2784