1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <libsnapshot/snapshot.h>
16
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <math.h>
20 #include <sys/file.h>
21 #include <sys/types.h>
22 #include <sys/unistd.h>
23
24 #include <filesystem>
25 #include <optional>
26 #include <thread>
27 #include <unordered_set>
28
29 #include <android-base/file.h>
30 #include <android-base/logging.h>
31 #include <android-base/parseint.h>
32 #include <android-base/properties.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35 #include <cutils/sockets.h>
36 #include <ext4_utils/ext4_utils.h>
37 #include <fs_mgr.h>
38 #include <fs_mgr/file_wait.h>
39 #include <fs_mgr_dm_linear.h>
40 #include <fstab/fstab.h>
41 #include <libdm/dm.h>
42 #include <libfiemap/image_manager.h>
43 #include <liblp/liblp.h>
44
45 #include <android/snapshot/snapshot.pb.h>
46 #include <libsnapshot/snapshot_stats.h>
47 #include "device_info.h"
48 #include "partition_cow_creator.h"
49 #include "snapshot_metadata_updater.h"
50 #include "snapshot_reader.h"
51 #include "utility.h"
52
53 namespace android {
54 namespace snapshot {
55
56 using android::base::unique_fd;
57 using android::dm::DeviceMapper;
58 using android::dm::DmDeviceState;
59 using android::dm::DmTable;
60 using android::dm::DmTargetLinear;
61 using android::dm::DmTargetSnapshot;
62 using android::dm::DmTargetUser;
63 using android::dm::kSectorSize;
64 using android::dm::SnapshotStorageMode;
65 using android::fiemap::FiemapStatus;
66 using android::fiemap::IImageManager;
67 using android::fs_mgr::CreateDmTable;
68 using android::fs_mgr::CreateLogicalPartition;
69 using android::fs_mgr::CreateLogicalPartitionParams;
70 using android::fs_mgr::GetPartitionGroupName;
71 using android::fs_mgr::GetPartitionName;
72 using android::fs_mgr::LpMetadata;
73 using android::fs_mgr::MetadataBuilder;
74 using android::fs_mgr::SlotNumberForSlotSuffix;
75 using android::hardware::boot::V1_1::MergeStatus;
76 using chromeos_update_engine::DeltaArchiveManifest;
77 using chromeos_update_engine::Extent;
78 using chromeos_update_engine::FileDescriptor;
79 using chromeos_update_engine::PartitionUpdate;
80 template <typename T>
81 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
82 using std::chrono::duration_cast;
83 using namespace std::chrono_literals;
84 using namespace std::string_literals;
85
86 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
87 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
88 static constexpr auto kUpdateStateCheckInterval = 2s;
89
90 MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status);
91
92 // Note: IImageManager is an incomplete type in the header, so the default
93 // destructor doesn't work.
~SnapshotManager()94 SnapshotManager::~SnapshotManager() {}
95
New(IDeviceInfo * info)96 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
97 if (!info) {
98 info = new DeviceInfo();
99 }
100
101 return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
102 }
103
NewForFirstStageMount(IDeviceInfo * info)104 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
105 if (!info) {
106 DeviceInfo* impl = new DeviceInfo();
107 impl->set_first_stage_init(true);
108 info = impl;
109 }
110 auto sm = New(info);
111
112 // The first-stage version of snapuserd is explicitly started by init. Do
113 // not attempt to using it during tests (which run in normal AOSP).
114 if (!sm->device()->IsTestDevice()) {
115 sm->use_first_stage_snapuserd_ = true;
116 }
117 return sm;
118 }
119
SnapshotManager(IDeviceInfo * device)120 SnapshotManager::SnapshotManager(IDeviceInfo* device)
121 : dm_(device->GetDeviceMapper()), device_(device), metadata_dir_(device_->GetMetadataDir()) {
122 merge_consistency_checker_ = android::snapshot::CheckMergeConsistency;
123 }
124
GetCowName(const std::string & snapshot_name)125 static std::string GetCowName(const std::string& snapshot_name) {
126 return snapshot_name + "-cow";
127 }
128
GetSnapshotDriver(LockedFile * lock)129 SnapshotManager::SnapshotDriver SnapshotManager::GetSnapshotDriver(LockedFile* lock) {
130 if (UpdateUsesUserSnapshots(lock)) {
131 return SnapshotManager::SnapshotDriver::DM_USER;
132 } else {
133 return SnapshotManager::SnapshotDriver::DM_SNAPSHOT;
134 }
135 }
136
GetDmUserCowName(const std::string & snapshot_name,SnapshotManager::SnapshotDriver driver)137 static std::string GetDmUserCowName(const std::string& snapshot_name,
138 SnapshotManager::SnapshotDriver driver) {
139 // dm-user block device will act as a snapshot device. We identify it with
140 // the same partition name so that when partitions can be mounted off
141 // dm-user.
142
143 switch (driver) {
144 case SnapshotManager::SnapshotDriver::DM_USER: {
145 return snapshot_name;
146 }
147
148 case SnapshotManager::SnapshotDriver::DM_SNAPSHOT: {
149 return snapshot_name + "-user-cow";
150 }
151
152 default: {
153 LOG(ERROR) << "Invalid snapshot driver";
154 return "";
155 }
156 }
157 }
158
GetCowImageDeviceName(const std::string & snapshot_name)159 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
160 return snapshot_name + "-cow-img";
161 }
162
GetBaseDeviceName(const std::string & partition_name)163 static std::string GetBaseDeviceName(const std::string& partition_name) {
164 return partition_name + "-base";
165 }
166
GetSourceDeviceName(const std::string & partition_name)167 static std::string GetSourceDeviceName(const std::string& partition_name) {
168 return partition_name + "-src";
169 }
170
BeginUpdate()171 bool SnapshotManager::BeginUpdate() {
172 bool needs_merge = false;
173 if (!TryCancelUpdate(&needs_merge)) {
174 return false;
175 }
176 if (needs_merge) {
177 LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
178 auto state = ProcessUpdateState();
179 LOG(INFO) << "Merged with state = " << state;
180 }
181
182 auto file = LockExclusive();
183 if (!file) return false;
184
185 // Purge the ImageManager just in case there is a corrupt lp_metadata file
186 // lying around. (NB: no need to return false on an error, we can let the
187 // update try to progress.)
188 if (EnsureImageManager()) {
189 images_->RemoveAllImages();
190 }
191
192 // Clear any cached metadata (this allows re-using one manager across tests).
193 old_partition_metadata_ = nullptr;
194
195 auto state = ReadUpdateState(file.get());
196 if (state != UpdateState::None) {
197 LOG(ERROR) << "An update is already in progress, cannot begin a new update";
198 return false;
199 }
200 return WriteUpdateState(file.get(), UpdateState::Initiated);
201 }
202
CancelUpdate()203 bool SnapshotManager::CancelUpdate() {
204 bool needs_merge = false;
205 if (!TryCancelUpdate(&needs_merge)) {
206 return false;
207 }
208 if (needs_merge) {
209 LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
210 }
211 return !needs_merge;
212 }
213
TryCancelUpdate(bool * needs_merge)214 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
215 *needs_merge = false;
216
217 auto file = LockExclusive();
218 if (!file) return false;
219
220 UpdateState state = ReadUpdateState(file.get());
221 if (state == UpdateState::None) {
222 RemoveInvalidSnapshots(file.get());
223 return true;
224 }
225
226 if (state == UpdateState::Initiated) {
227 LOG(INFO) << "Update has been initiated, now canceling";
228 return RemoveAllUpdateState(file.get());
229 }
230
231 if (state == UpdateState::Unverified) {
232 // We completed an update, but it can still be canceled if we haven't booted into it.
233 auto slot = GetCurrentSlot();
234 if (slot != Slot::Target) {
235 LOG(INFO) << "Canceling previously completed updates (if any)";
236 return RemoveAllUpdateState(file.get());
237 }
238 }
239 *needs_merge = true;
240 return true;
241 }
242
ReadUpdateSourceSlotSuffix()243 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
244 auto boot_file = GetSnapshotBootIndicatorPath();
245 std::string contents;
246 if (!android::base::ReadFileToString(boot_file, &contents)) {
247 PLOG(WARNING) << "Cannot read " << boot_file;
248 return {};
249 }
250 return contents;
251 }
252
GetCurrentSlot()253 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
254 auto contents = ReadUpdateSourceSlotSuffix();
255 if (contents.empty()) {
256 return Slot::Unknown;
257 }
258 if (device_->GetSlotSuffix() == contents) {
259 return Slot::Source;
260 }
261 return Slot::Target;
262 }
263
GetSnapshotSlotSuffix()264 std::string SnapshotManager::GetSnapshotSlotSuffix() {
265 switch (GetCurrentSlot()) {
266 case Slot::Target:
267 return device_->GetSlotSuffix();
268 default:
269 return device_->GetOtherSlotSuffix();
270 }
271 }
272
RemoveFileIfExists(const std::string & path)273 static bool RemoveFileIfExists(const std::string& path) {
274 std::string message;
275 if (!android::base::RemoveFileIfExists(path, &message)) {
276 LOG(ERROR) << "Remove failed: " << path << ": " << message;
277 return false;
278 }
279 return true;
280 }
281
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)282 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
283 if (prolog && !prolog()) {
284 LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
285 return false;
286 }
287
288 LOG(INFO) << "Removing all update state.";
289
290 if (!RemoveAllSnapshots(lock)) {
291 LOG(ERROR) << "Could not remove all snapshots";
292 return false;
293 }
294
295 // It's okay if these fail:
296 // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
297 // reading the indicator file, so it's not a problem if it still exists
298 // after the update completes.
299 // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
300 // matches the incoming update.
301 std::vector<std::string> files = {
302 GetSnapshotBootIndicatorPath(),
303 GetRollbackIndicatorPath(),
304 GetForwardMergeIndicatorPath(),
305 GetOldPartitionMetadataPath(),
306 };
307 for (const auto& file : files) {
308 RemoveFileIfExists(file);
309 }
310
311 // If this fails, we'll keep trying to remove the update state (as the
312 // device reboots or starts a new update) until it finally succeeds.
313 return WriteUpdateState(lock, UpdateState::None);
314 }
315
FinishedSnapshotWrites(bool wipe)316 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
317 auto lock = LockExclusive();
318 if (!lock) return false;
319
320 auto update_state = ReadUpdateState(lock.get());
321 if (update_state == UpdateState::Unverified) {
322 LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
323 return true;
324 }
325
326 if (update_state != UpdateState::Initiated) {
327 LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
328 return false;
329 }
330
331 if (!EnsureNoOverflowSnapshot(lock.get())) {
332 LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
333 return false;
334 }
335
336 if (!UpdateForwardMergeIndicator(wipe)) {
337 return false;
338 }
339
340 // This file is written on boot to detect whether a rollback occurred. It
341 // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
342 // snapshots too early.
343 if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
344 return false;
345 }
346
347 // This file acts as both a quick indicator for init (it can use access(2)
348 // to decide how to do first-stage mounts), and it stores the old slot, so
349 // we can tell whether or not we performed a rollback.
350 auto contents = device_->GetSlotSuffix();
351 auto boot_file = GetSnapshotBootIndicatorPath();
352 if (!WriteStringToFileAtomic(contents, boot_file)) {
353 PLOG(ERROR) << "write failed: " << boot_file;
354 return false;
355 }
356 return WriteUpdateState(lock.get(), UpdateState::Unverified);
357 }
358
CreateSnapshot(LockedFile * lock,PartitionCowCreator * cow_creator,SnapshotStatus * status)359 bool SnapshotManager::CreateSnapshot(LockedFile* lock, PartitionCowCreator* cow_creator,
360 SnapshotStatus* status) {
361 CHECK(lock);
362 CHECK(lock->lock_mode() == LOCK_EX);
363 CHECK(status);
364
365 if (status->name().empty()) {
366 LOG(ERROR) << "SnapshotStatus has no name.";
367 return false;
368 }
369 // Check these sizes. Like liblp, we guarantee the partition size is
370 // respected, which means it has to be sector-aligned. (This guarantee is
371 // useful for locating avb footers correctly). The COW file size, however,
372 // can be arbitrarily larger than specified, so we can safely round it up.
373 if (status->device_size() % kSectorSize != 0) {
374 LOG(ERROR) << "Snapshot " << status->name()
375 << " device size is not a multiple of the sector size: "
376 << status->device_size();
377 return false;
378 }
379 if (status->snapshot_size() % kSectorSize != 0) {
380 LOG(ERROR) << "Snapshot " << status->name()
381 << " snapshot size is not a multiple of the sector size: "
382 << status->snapshot_size();
383 return false;
384 }
385 if (status->cow_partition_size() % kSectorSize != 0) {
386 LOG(ERROR) << "Snapshot " << status->name()
387 << " cow partition size is not a multiple of the sector size: "
388 << status->cow_partition_size();
389 return false;
390 }
391 if (status->cow_file_size() % kSectorSize != 0) {
392 LOG(ERROR) << "Snapshot " << status->name()
393 << " cow file size is not a multiple of the sector size: "
394 << status->cow_file_size();
395 return false;
396 }
397
398 status->set_state(SnapshotState::CREATED);
399 status->set_sectors_allocated(0);
400 status->set_metadata_sectors(0);
401 status->set_compression_enabled(cow_creator->compression_enabled);
402 status->set_compression_algorithm(cow_creator->compression_algorithm);
403
404 if (!WriteSnapshotStatus(lock, *status)) {
405 PLOG(ERROR) << "Could not write snapshot status: " << status->name();
406 return false;
407 }
408 return true;
409 }
410
CreateCowImage(LockedFile * lock,const std::string & name)411 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
412 CHECK(lock);
413 CHECK(lock->lock_mode() == LOCK_EX);
414 if (!EnsureImageManager()) return Return::Error();
415
416 SnapshotStatus status;
417 if (!ReadSnapshotStatus(lock, name, &status)) {
418 return Return::Error();
419 }
420
421 // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
422 if (status.cow_file_size() % kSectorSize != 0) {
423 LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
424 << status.cow_file_size();
425 return Return::Error();
426 }
427
428 std::string cow_image_name = GetCowImageDeviceName(name);
429 int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
430 return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
431 }
432
MapDmUserCow(LockedFile * lock,const std::string & name,const std::string & cow_file,const std::string & base_device,const std::string & base_path_merge,const std::chrono::milliseconds & timeout_ms,std::string * path)433 bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name,
434 const std::string& cow_file, const std::string& base_device,
435 const std::string& base_path_merge,
436 const std::chrono::milliseconds& timeout_ms, std::string* path) {
437 CHECK(lock);
438
439 if (UpdateUsesUserSnapshots(lock)) {
440 SnapshotStatus status;
441 if (!ReadSnapshotStatus(lock, name, &status)) {
442 LOG(ERROR) << "MapDmUserCow: ReadSnapshotStatus failed...";
443 return false;
444 }
445
446 if (status.state() == SnapshotState::NONE ||
447 status.state() == SnapshotState::MERGE_COMPLETED) {
448 LOG(ERROR) << "Should not create a snapshot device for " << name
449 << " after merging has completed.";
450 return false;
451 }
452
453 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
454 if (update_status.state() == UpdateState::MergeCompleted ||
455 update_status.state() == UpdateState::MergeNeedsReboot) {
456 LOG(ERROR) << "Should not create a snapshot device for " << name
457 << " after global merging has completed.";
458 return false;
459 }
460 }
461
462 // Use an extra decoration for first-stage init, so we can transition
463 // to a new table entry in second-stage.
464 std::string misc_name = name;
465 if (use_first_stage_snapuserd_) {
466 misc_name += "-init";
467 }
468
469 if (!EnsureSnapuserdConnected()) {
470 return false;
471 }
472
473 uint64_t base_sectors = 0;
474 if (!UpdateUsesUserSnapshots(lock)) {
475 base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device);
476 if (base_sectors == 0) {
477 LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
478 return false;
479 }
480 } else {
481 // For userspace snapshots, the size of the base device is taken as the
482 // size of the dm-user block device. Since there is no pseudo mapping
483 // created in the daemon, we no longer need to rely on the daemon for
484 // sizing the dm-user block device.
485 unique_fd fd(TEMP_FAILURE_RETRY(open(base_path_merge.c_str(), O_RDONLY | O_CLOEXEC)));
486 if (fd < 0) {
487 LOG(ERROR) << "Cannot open block device: " << base_path_merge;
488 return false;
489 }
490
491 uint64_t dev_sz = get_block_device_size(fd.get());
492 if (!dev_sz) {
493 LOG(ERROR) << "Failed to find block device size: " << base_path_merge;
494 return false;
495 }
496
497 base_sectors = dev_sz >> 9;
498 }
499
500 DmTable table;
501 table.Emplace<DmTargetUser>(0, base_sectors, misc_name);
502 if (!dm_.CreateDevice(name, table, path, timeout_ms)) {
503 LOG(ERROR) << " dm-user: CreateDevice failed... ";
504 return false;
505 }
506 if (!WaitForDevice(*path, timeout_ms)) {
507 LOG(ERROR) << " dm-user: timeout: Failed to create block device for: " << name;
508 return false;
509 }
510
511 auto control_device = "/dev/dm-user/" + misc_name;
512 if (!WaitForDevice(control_device, timeout_ms)) {
513 return false;
514 }
515
516 if (UpdateUsesUserSnapshots(lock)) {
517 // Now that the dm-user device is created, initialize the daemon and
518 // spin up the worker threads.
519 if (!snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device, base_path_merge)) {
520 LOG(ERROR) << "InitDmUserCow failed";
521 return false;
522 }
523 }
524
525 return snapuserd_client_->AttachDmUser(misc_name);
526 }
527
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)528 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
529 const std::string& base_device, const std::string& cow_device,
530 const std::chrono::milliseconds& timeout_ms,
531 std::string* dev_path) {
532 CHECK(lock);
533
534 SnapshotStatus status;
535 if (!ReadSnapshotStatus(lock, name, &status)) {
536 return false;
537 }
538 if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
539 LOG(ERROR) << "Should not create a snapshot device for " << name
540 << " after merging has completed.";
541 return false;
542 }
543
544 // Validate the block device size, as well as the requested snapshot size.
545 // Note that during first-stage init, we don't have the device paths.
546 if (android::base::StartsWith(base_device, "/")) {
547 unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
548 if (fd < 0) {
549 PLOG(ERROR) << "open failed: " << base_device;
550 return false;
551 }
552 auto dev_size = get_block_device_size(fd);
553 if (!dev_size) {
554 PLOG(ERROR) << "Could not determine block device size: " << base_device;
555 return false;
556 }
557 if (status.device_size() != dev_size) {
558 LOG(ERROR) << "Block device size for " << base_device << " does not match"
559 << "(expected " << status.device_size() << ", got " << dev_size << ")";
560 return false;
561 }
562 }
563 if (status.device_size() % kSectorSize != 0) {
564 LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
565 return false;
566 }
567 if (status.snapshot_size() % kSectorSize != 0 ||
568 status.snapshot_size() > status.device_size()) {
569 LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
570 return false;
571 }
572 if (status.device_size() != status.snapshot_size()) {
573 LOG(ERROR) << "Device size and snapshot size must be the same (device size = "
574 << status.device_size() << ", snapshot size = " << status.snapshot_size();
575 return false;
576 }
577
578 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
579
580 // Note that merging is a global state. We do track whether individual devices
581 // have completed merging, but the start of the merge process is considered
582 // atomic.
583 SnapshotStorageMode mode;
584 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
585 switch (update_status.state()) {
586 case UpdateState::MergeCompleted:
587 case UpdateState::MergeNeedsReboot:
588 LOG(ERROR) << "Should not create a snapshot device for " << name
589 << " after global merging has completed.";
590 return false;
591 case UpdateState::Merging:
592 case UpdateState::MergeFailed:
593 // Note: MergeFailed indicates that a merge is in progress, but
594 // is possibly stalled. We still have to honor the merge.
595 if (DecideMergePhase(status) == update_status.merge_phase()) {
596 mode = SnapshotStorageMode::Merge;
597 } else {
598 mode = SnapshotStorageMode::Persistent;
599 }
600 break;
601 default:
602 mode = SnapshotStorageMode::Persistent;
603 break;
604 }
605
606 if (mode == SnapshotStorageMode::Persistent && status.state() == SnapshotState::MERGING) {
607 LOG(ERROR) << "Snapshot: " << name
608 << " has snapshot status Merging but mode set to Persistent."
609 << " Changing mode to Snapshot-Merge.";
610 mode = SnapshotStorageMode::Merge;
611 }
612
613 DmTable table;
614 table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
615 kSnapshotChunkSize);
616 if (!dm_.CreateDevice(name, table, dev_path, timeout_ms)) {
617 LOG(ERROR) << "Could not create snapshot device: " << name;
618 return false;
619 }
620 return true;
621 }
622
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)623 std::optional<std::string> SnapshotManager::MapCowImage(
624 const std::string& name, const std::chrono::milliseconds& timeout_ms) {
625 if (!EnsureImageManager()) return std::nullopt;
626 auto cow_image_name = GetCowImageDeviceName(name);
627
628 bool ok;
629 std::string cow_dev;
630 if (device_->IsRecovery() || device_->IsFirstStageInit()) {
631 const auto& opener = device_->GetPartitionOpener();
632 ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
633 } else {
634 ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
635 }
636
637 if (ok) {
638 LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
639 return cow_dev;
640 }
641 LOG(ERROR) << "Could not map image device: " << cow_image_name;
642 return std::nullopt;
643 }
644
MapSourceDevice(LockedFile * lock,const std::string & name,const std::chrono::milliseconds & timeout_ms,std::string * path)645 bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name,
646 const std::chrono::milliseconds& timeout_ms,
647 std::string* path) {
648 CHECK(lock);
649
650 auto metadata = ReadOldPartitionMetadata(lock);
651 if (!metadata) {
652 LOG(ERROR) << "Could not map source device due to missing or corrupt metadata";
653 return false;
654 }
655
656 auto old_name = GetOtherPartitionName(name);
657 auto slot_suffix = device_->GetSlotSuffix();
658 auto slot = SlotNumberForSlotSuffix(slot_suffix);
659
660 CreateLogicalPartitionParams params = {
661 .block_device = device_->GetSuperDevice(slot),
662 .metadata = metadata,
663 .partition_name = old_name,
664 .timeout_ms = timeout_ms,
665 .device_name = GetSourceDeviceName(name),
666 .partition_opener = &device_->GetPartitionOpener(),
667 };
668 if (!CreateLogicalPartition(std::move(params), path)) {
669 LOG(ERROR) << "Could not create source device for snapshot " << name;
670 return false;
671 }
672 return true;
673 }
674
UnmapSnapshot(LockedFile * lock,const std::string & name)675 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
676 CHECK(lock);
677
678 if (UpdateUsesUserSnapshots(lock)) {
679 if (!UnmapUserspaceSnapshotDevice(lock, name)) {
680 return false;
681 }
682 } else {
683 if (!DeleteDeviceIfExists(name)) {
684 LOG(ERROR) << "Could not delete snapshot device: " << name;
685 return false;
686 }
687 }
688 return true;
689 }
690
UnmapCowImage(const std::string & name)691 bool SnapshotManager::UnmapCowImage(const std::string& name) {
692 if (!EnsureImageManager()) return false;
693 return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
694 }
695
DeleteSnapshot(LockedFile * lock,const std::string & name)696 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
697 CHECK(lock);
698 CHECK(lock->lock_mode() == LOCK_EX);
699 if (!EnsureImageManager()) return false;
700
701 if (!UnmapCowDevices(lock, name)) {
702 return false;
703 }
704
705 // We can't delete snapshots in recovery. The only way we'd try is it we're
706 // completing or canceling a merge in preparation for a data wipe, in which
707 // case, we don't care if the file sticks around.
708 if (device_->IsRecovery()) {
709 LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
710 return true;
711 }
712
713 auto cow_image_name = GetCowImageDeviceName(name);
714 if (images_->BackingImageExists(cow_image_name)) {
715 if (!images_->DeleteBackingImage(cow_image_name)) {
716 return false;
717 }
718 }
719
720 std::string error;
721 auto file_path = GetSnapshotStatusFilePath(name);
722 if (!android::base::RemoveFileIfExists(file_path, &error)) {
723 LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
724 return false;
725 }
726 return true;
727 }
728
InitiateMerge()729 bool SnapshotManager::InitiateMerge() {
730 auto lock = LockExclusive();
731 if (!lock) return false;
732
733 UpdateState state = ReadUpdateState(lock.get());
734 if (state != UpdateState::Unverified) {
735 LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
736 return false;
737 }
738
739 auto slot = GetCurrentSlot();
740 if (slot != Slot::Target) {
741 LOG(ERROR) << "Device cannot merge while not booting from new slot";
742 return false;
743 }
744
745 std::vector<std::string> snapshots;
746 if (!ListSnapshots(lock.get(), &snapshots)) {
747 LOG(ERROR) << "Could not list snapshots";
748 return false;
749 }
750
751 auto other_suffix = device_->GetOtherSlotSuffix();
752
753 for (const auto& snapshot : snapshots) {
754 if (android::base::EndsWith(snapshot, other_suffix)) {
755 // Allow the merge to continue, but log this unexpected case.
756 LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
757 continue;
758 }
759
760 // The device has to be mapped, since everything should be merged at
761 // the same time. This is a fairly serious error. We could forcefully
762 // map everything here, but it should have been mapped during first-
763 // stage init.
764 if (dm_.GetState(snapshot) == DmDeviceState::INVALID) {
765 LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
766 return false;
767 }
768 }
769
770 auto metadata = ReadCurrentMetadata();
771 for (auto it = snapshots.begin(); it != snapshots.end();) {
772 switch (GetMetadataPartitionState(*metadata, *it)) {
773 case MetadataPartitionState::Flashed:
774 LOG(WARNING) << "Detected re-flashing for partition " << *it
775 << ". Skip merging it.";
776 [[fallthrough]];
777 case MetadataPartitionState::None: {
778 LOG(WARNING) << "Deleting snapshot for partition " << *it;
779 if (!DeleteSnapshot(lock.get(), *it)) {
780 LOG(WARNING) << "Cannot delete snapshot for partition " << *it
781 << ". Skip merging it anyways.";
782 }
783 it = snapshots.erase(it);
784 } break;
785 case MetadataPartitionState::Updated: {
786 ++it;
787 } break;
788 }
789 }
790
791 bool compression_enabled = false;
792
793 std::vector<std::string> first_merge_group;
794
795 DmTargetSnapshot::Status initial_target_values = {};
796 for (const auto& snapshot : snapshots) {
797 if (!UpdateUsesUserSnapshots(lock.get())) {
798 DmTargetSnapshot::Status current_status;
799 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) {
800 return false;
801 }
802 initial_target_values.sectors_allocated += current_status.sectors_allocated;
803 initial_target_values.total_sectors += current_status.total_sectors;
804 initial_target_values.metadata_sectors += current_status.metadata_sectors;
805 }
806
807 SnapshotStatus snapshot_status;
808 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
809 return false;
810 }
811
812 compression_enabled |= snapshot_status.compression_enabled();
813 if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) {
814 first_merge_group.emplace_back(snapshot);
815 }
816 }
817
818 SnapshotUpdateStatus initial_status = ReadSnapshotUpdateStatus(lock.get());
819 initial_status.set_state(UpdateState::Merging);
820 initial_status.set_compression_enabled(compression_enabled);
821
822 if (!UpdateUsesUserSnapshots(lock.get())) {
823 initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
824 initial_status.set_total_sectors(initial_target_values.total_sectors);
825 initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
826 }
827
828 // If any partitions shrunk, we need to merge them before we merge any other
829 // partitions (see b/177935716). Otherwise, a merge from another partition
830 // may overwrite the source block of a copy operation.
831 const std::vector<std::string>* merge_group;
832 if (first_merge_group.empty()) {
833 merge_group = &snapshots;
834 initial_status.set_merge_phase(MergePhase::SECOND_PHASE);
835 } else {
836 merge_group = &first_merge_group;
837 initial_status.set_merge_phase(MergePhase::FIRST_PHASE);
838 }
839
840 // Point of no return - mark that we're starting a merge. From now on every
841 // eligible snapshot must be a merge target.
842 if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
843 return false;
844 }
845
846 auto reported_code = MergeFailureCode::Ok;
847 for (const auto& snapshot : *merge_group) {
848 // If this fails, we have no choice but to continue. Everything must
849 // be merged. This is not an ideal state to be in, but it is safe,
850 // because we the next boot will try again.
851 auto code = SwitchSnapshotToMerge(lock.get(), snapshot);
852 if (code != MergeFailureCode::Ok) {
853 LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
854 if (reported_code == MergeFailureCode::Ok) {
855 reported_code = code;
856 }
857 }
858 }
859
860 // If we couldn't switch everything to a merge target, pre-emptively mark
861 // this merge as failed. It will get acknowledged when WaitForMerge() is
862 // called.
863 if (reported_code != MergeFailureCode::Ok) {
864 WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code);
865 }
866
867 // Return true no matter what, because a merge was initiated.
868 return true;
869 }
870
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)871 MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
872 SnapshotStatus status;
873 if (!ReadSnapshotStatus(lock, name, &status)) {
874 return MergeFailureCode::ReadStatus;
875 }
876 if (status.state() != SnapshotState::CREATED) {
877 LOG(WARNING) << "Snapshot " << name
878 << " has unexpected state: " << SnapshotState_Name(status.state());
879 }
880
881 if (UpdateUsesUserSnapshots(lock)) {
882 if (EnsureSnapuserdConnected()) {
883 // This is the point where we inform the daemon to initiate/resume
884 // the merge
885 if (!snapuserd_client_->InitiateMerge(name)) {
886 return MergeFailureCode::UnknownTable;
887 }
888 } else {
889 LOG(ERROR) << "Failed to connect to snapuserd daemon to initiate merge";
890 return MergeFailureCode::UnknownTable;
891 }
892 } else {
893 // After this, we return true because we technically did switch to a merge
894 // target. Everything else we do here is just informational.
895 if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) {
896 return code;
897 }
898 }
899
900 status.set_state(SnapshotState::MERGING);
901
902 if (!UpdateUsesUserSnapshots(lock)) {
903 DmTargetSnapshot::Status dm_status;
904 if (!QuerySnapshotStatus(name, nullptr, &dm_status)) {
905 LOG(ERROR) << "Could not query merge status for snapshot: " << name;
906 }
907 status.set_sectors_allocated(dm_status.sectors_allocated);
908 status.set_metadata_sectors(dm_status.metadata_sectors);
909 }
910
911 if (!WriteSnapshotStatus(lock, status)) {
912 LOG(ERROR) << "Could not update status file for snapshot: " << name;
913 }
914 return MergeFailureCode::Ok;
915 }
916
RewriteSnapshotDeviceTable(const std::string & name)917 MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
918 std::vector<DeviceMapper::TargetInfo> old_targets;
919 if (!dm_.GetTableInfo(name, &old_targets)) {
920 LOG(ERROR) << "Could not read snapshot device table: " << name;
921 return MergeFailureCode::GetTableInfo;
922 }
923 if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
924 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name;
925 return MergeFailureCode::UnknownTable;
926 }
927
928 std::string base_device, cow_device;
929 if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
930 LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name;
931 return MergeFailureCode::GetTableParams;
932 }
933
934 DmTable table;
935 table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
936 SnapshotStorageMode::Merge, kSnapshotChunkSize);
937 if (!dm_.LoadTableAndActivate(name, table)) {
938 LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name;
939 return MergeFailureCode::ActivateNewTable;
940 }
941 LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name;
942 return MergeFailureCode::Ok;
943 }
944
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)945 bool SnapshotManager::GetSingleTarget(const std::string& dm_name, TableQuery query,
946 DeviceMapper::TargetInfo* target) {
947 if (dm_.GetState(dm_name) == DmDeviceState::INVALID) {
948 return false;
949 }
950
951 std::vector<DeviceMapper::TargetInfo> targets;
952 bool result;
953 if (query == TableQuery::Status) {
954 result = dm_.GetTableStatus(dm_name, &targets);
955 } else {
956 result = dm_.GetTableInfo(dm_name, &targets);
957 }
958 if (!result) {
959 LOG(ERROR) << "Could not query device: " << dm_name;
960 return false;
961 }
962 if (targets.size() != 1) {
963 return false;
964 }
965
966 *target = std::move(targets[0]);
967 return true;
968 }
969
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)970 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
971 DeviceMapper::TargetInfo snap_target;
972 if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
973 return false;
974 }
975 auto type = DeviceMapper::GetTargetType(snap_target.spec);
976
977 // If this is not a user-snapshot device then it should either
978 // be a dm-snapshot or dm-snapshot-merge target
979 if (type != "user") {
980 if (type != "snapshot" && type != "snapshot-merge") {
981 return false;
982 }
983 }
984
985 if (target) {
986 *target = std::move(snap_target);
987 }
988 return true;
989 }
990
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)991 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
992 DmTargetSnapshot::Status* status) {
993 DeviceMapper::TargetInfo target;
994 if (!IsSnapshotDevice(dm_name, &target)) {
995 LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
996 return false;
997 }
998 if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
999 LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
1000 return false;
1001 }
1002 if (target_type) {
1003 *target_type = DeviceMapper::GetTargetType(target.spec);
1004 }
1005 if (!status->error.empty()) {
1006 LOG(ERROR) << "Snapshot: " << dm_name << " returned error code: " << status->error;
1007 return false;
1008 }
1009 return true;
1010 }
1011
1012 // Note that when a merge fails, we will *always* try again to complete the
1013 // merge each time the device boots. There is no harm in doing so, and if
1014 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)1015 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
1016 const std::function<bool()>& before_cancel) {
1017 while (true) {
1018 auto result = CheckMergeState(before_cancel);
1019 LOG(INFO) << "ProcessUpdateState handling state: " << result.state;
1020
1021 if (result.state == UpdateState::MergeFailed) {
1022 AcknowledgeMergeFailure(result.failure_code);
1023 }
1024 if (result.state != UpdateState::Merging) {
1025 // Either there is no merge, or the merge was finished, so no need
1026 // to keep waiting.
1027 return result.state;
1028 }
1029
1030 if (callback && !callback()) {
1031 return result.state;
1032 }
1033
1034 // This wait is not super time sensitive, so we have a relatively
1035 // low polling frequency.
1036 std::this_thread::sleep_for(kUpdateStateCheckInterval);
1037 }
1038 }
1039
CheckMergeState(const std::function<bool ()> & before_cancel)1040 auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult {
1041 auto lock = LockExclusive();
1042 if (!lock) {
1043 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock);
1044 }
1045
1046 auto result = CheckMergeState(lock.get(), before_cancel);
1047 LOG(INFO) << "CheckMergeState for snapshots returned: " << result.state;
1048
1049 if (result.state == UpdateState::MergeCompleted) {
1050 // Do this inside the same lock. Failures get acknowledged without the
1051 // lock, because flock() might have failed.
1052 AcknowledgeMergeSuccess(lock.get());
1053 } else if (result.state == UpdateState::Cancelled) {
1054 if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
1055 LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
1056 }
1057 }
1058 return result;
1059 }
1060
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)1061 auto SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel)
1062 -> MergeResult {
1063 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1064 switch (update_status.state()) {
1065 case UpdateState::None:
1066 case UpdateState::MergeCompleted:
1067 // Harmless races are allowed between two callers of WaitForMerge,
1068 // so in both of these cases we just propagate the state.
1069 return MergeResult(update_status.state());
1070
1071 case UpdateState::Merging:
1072 case UpdateState::MergeNeedsReboot:
1073 case UpdateState::MergeFailed:
1074 // We'll poll each snapshot below. Note that for the NeedsReboot
1075 // case, we always poll once to give cleanup another opportunity to
1076 // run.
1077 break;
1078
1079 case UpdateState::Unverified:
1080 // This is an edge case. Normally cancelled updates are detected
1081 // via the merge poll below, but if we never started a merge, we
1082 // need to also check here.
1083 if (HandleCancelledUpdate(lock, before_cancel)) {
1084 return MergeResult(UpdateState::Cancelled);
1085 }
1086 return MergeResult(update_status.state());
1087
1088 default:
1089 return MergeResult(update_status.state());
1090 }
1091
1092 std::vector<std::string> snapshots;
1093 if (!ListSnapshots(lock, &snapshots)) {
1094 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
1095 }
1096
1097 auto other_suffix = device_->GetOtherSlotSuffix();
1098
1099 bool cancelled = false;
1100 bool merging = false;
1101 bool needs_reboot = false;
1102 bool wrong_phase = false;
1103 MergeFailureCode failure_code = MergeFailureCode::Ok;
1104 for (const auto& snapshot : snapshots) {
1105 if (android::base::EndsWith(snapshot, other_suffix)) {
1106 // This will have triggered an error message in InitiateMerge already.
1107 LOG(INFO) << "Skipping merge validation of unexpected snapshot: " << snapshot;
1108 continue;
1109 }
1110
1111 auto result = CheckTargetMergeState(lock, snapshot, update_status);
1112 LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << result.state;
1113
1114 switch (result.state) {
1115 case UpdateState::MergeFailed:
1116 // Take the first failure code in case other failures compound.
1117 if (failure_code == MergeFailureCode::Ok) {
1118 failure_code = result.failure_code;
1119 }
1120 break;
1121 case UpdateState::Merging:
1122 merging = true;
1123 break;
1124 case UpdateState::MergeNeedsReboot:
1125 needs_reboot = true;
1126 break;
1127 case UpdateState::MergeCompleted:
1128 break;
1129 case UpdateState::Cancelled:
1130 cancelled = true;
1131 break;
1132 case UpdateState::None:
1133 wrong_phase = true;
1134 break;
1135 default:
1136 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
1137 << "\"" << result.state << "\"";
1138 if (failure_code == MergeFailureCode::Ok) {
1139 failure_code = MergeFailureCode::UnexpectedMergeState;
1140 }
1141 break;
1142 }
1143 }
1144
1145 if (merging) {
1146 // Note that we handle "Merging" before we handle anything else. We
1147 // want to poll until *nothing* is merging if we can, so everything has
1148 // a chance to get marked as completed or failed.
1149 return MergeResult(UpdateState::Merging);
1150 }
1151 if (failure_code != MergeFailureCode::Ok) {
1152 // Note: since there are many drop-out cases for failure, we acknowledge
1153 // it in WaitForMerge rather than here and elsewhere.
1154 return MergeResult(UpdateState::MergeFailed, failure_code);
1155 }
1156 if (wrong_phase) {
1157 // If we got here, no other partitions are being merged, and nothing
1158 // failed to merge. It's safe to move to the next merge phase.
1159 auto code = MergeSecondPhaseSnapshots(lock);
1160 if (code != MergeFailureCode::Ok) {
1161 return MergeResult(UpdateState::MergeFailed, code);
1162 }
1163 return MergeResult(UpdateState::Merging);
1164 }
1165 if (needs_reboot) {
1166 WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
1167 return MergeResult(UpdateState::MergeNeedsReboot);
1168 }
1169 if (cancelled) {
1170 // This is an edge case, that we handle as correctly as we sensibly can.
1171 // The underlying partition has changed behind update_engine, and we've
1172 // removed the snapshot as a result. The exact state of the update is
1173 // undefined now, but this can only happen on an unlocked device where
1174 // partitions can be flashed without wiping userdata.
1175 return MergeResult(UpdateState::Cancelled);
1176 }
1177 return MergeResult(UpdateState::MergeCompleted);
1178 }
1179
CheckTargetMergeState(LockedFile * lock,const std::string & name,const SnapshotUpdateStatus & update_status)1180 auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
1181 const SnapshotUpdateStatus& update_status)
1182 -> MergeResult {
1183 SnapshotStatus snapshot_status;
1184 if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
1185 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus);
1186 }
1187
1188 std::unique_ptr<LpMetadata> current_metadata;
1189
1190 if (!IsSnapshotDevice(name)) {
1191 if (!current_metadata) {
1192 current_metadata = ReadCurrentMetadata();
1193 }
1194
1195 if (!current_metadata ||
1196 GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
1197 DeleteSnapshot(lock, name);
1198 return MergeResult(UpdateState::Cancelled);
1199 }
1200
1201 // During a check, we decided the merge was complete, but we were unable to
1202 // collapse the device-mapper stack and perform COW cleanup. If we haven't
1203 // rebooted after this check, the device will still be a snapshot-merge
1204 // target. If we have rebooted, the device will now be a linear target,
1205 // and we can try cleanup again.
1206 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1207 // NB: It's okay if this fails now, we gave cleanup our best effort.
1208 OnSnapshotMergeComplete(lock, name, snapshot_status);
1209 return MergeResult(UpdateState::MergeCompleted);
1210 }
1211
1212 LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name;
1213 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1214 }
1215
1216 // This check is expensive so it is only enabled for debugging.
1217 DCHECK((current_metadata = ReadCurrentMetadata()) &&
1218 GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
1219
1220 if (UpdateUsesUserSnapshots(lock)) {
1221 std::string merge_status;
1222 if (EnsureSnapuserdConnected()) {
1223 // Query the snapshot status from the daemon
1224 merge_status = snapuserd_client_->QuerySnapshotStatus(name);
1225 } else {
1226 MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1227 }
1228
1229 if (merge_status == "snapshot-merge-failed") {
1230 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1231 }
1232
1233 // This is the case when device reboots during merge. Once the device boots,
1234 // snapuserd daemon will not resume merge immediately in first stage init.
1235 // This is slightly different as compared to dm-snapshot-merge; In this
1236 // case, metadata file will have "MERGING" state whereas the daemon will be
1237 // waiting to resume the merge. Thus, we resume the merge at this point.
1238 if (merge_status == "snapshot" && snapshot_status.state() == SnapshotState::MERGING) {
1239 if (!snapuserd_client_->InitiateMerge(name)) {
1240 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1241 }
1242 return MergeResult(UpdateState::Merging);
1243 }
1244
1245 if (merge_status == "snapshot" &&
1246 DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1247 update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1248 // The snapshot is not being merged because it's in the wrong phase.
1249 return MergeResult(UpdateState::None);
1250 }
1251
1252 if (merge_status == "snapshot-merge") {
1253 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1254 LOG(ERROR) << "Snapshot " << name
1255 << " is merging after being marked merge-complete.";
1256 return MergeResult(UpdateState::MergeFailed,
1257 MergeFailureCode::UnmergedSectorsAfterCompletion);
1258 }
1259 return MergeResult(UpdateState::Merging);
1260 }
1261
1262 if (merge_status != "snapshot-merge-complete") {
1263 LOG(ERROR) << "Snapshot " << name << " has incorrect status: " << merge_status;
1264 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1265 }
1266 } else {
1267 // dm-snapshot in the kernel
1268 std::string target_type;
1269 DmTargetSnapshot::Status status;
1270 if (!QuerySnapshotStatus(name, &target_type, &status)) {
1271 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1272 }
1273 if (target_type == "snapshot" &&
1274 DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1275 update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1276 // The snapshot is not being merged because it's in the wrong phase.
1277 return MergeResult(UpdateState::None);
1278 }
1279 if (target_type != "snapshot-merge") {
1280 // We can get here if we failed to rewrite the target type in
1281 // InitiateMerge(). If we failed to create the target in first-stage
1282 // init, boot would not succeed.
1283 LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
1284 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1285 }
1286
1287 // These two values are equal when merging is complete.
1288 if (status.sectors_allocated != status.metadata_sectors) {
1289 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1290 LOG(ERROR) << "Snapshot " << name
1291 << " is merging after being marked merge-complete.";
1292 return MergeResult(UpdateState::MergeFailed,
1293 MergeFailureCode::UnmergedSectorsAfterCompletion);
1294 }
1295 return MergeResult(UpdateState::Merging);
1296 }
1297 }
1298
1299 // Merge is complete at this point
1300
1301 auto code = CheckMergeConsistency(lock, name, snapshot_status);
1302 if (code != MergeFailureCode::Ok) {
1303 return MergeResult(UpdateState::MergeFailed, code);
1304 }
1305
1306 // Merging is done. First, update the status file to indicate the merge
1307 // is complete. We do this before calling OnSnapshotMergeComplete, even
1308 // though this means the write is potentially wasted work (since in the
1309 // ideal case we'll immediately delete the file).
1310 //
1311 // This makes it simpler to reason about the next reboot: no matter what
1312 // part of cleanup failed, first-stage init won't try to create another
1313 // snapshot device for this partition.
1314 snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1315 if (!WriteSnapshotStatus(lock, snapshot_status)) {
1316 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus);
1317 }
1318 if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1319 return MergeResult(UpdateState::MergeNeedsReboot);
1320 }
1321 return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
1322 }
1323
1324 // This returns the backing device, not the dm-user layer.
GetMappedCowDeviceName(const std::string & snapshot,const SnapshotStatus & status)1325 static std::string GetMappedCowDeviceName(const std::string& snapshot,
1326 const SnapshotStatus& status) {
1327 // If no partition was created (the COW exists entirely on /data), the
1328 // device-mapper layering is different than if we had a partition.
1329 if (status.cow_partition_size() == 0) {
1330 return GetCowImageDeviceName(snapshot);
1331 }
1332 return GetCowName(snapshot);
1333 }
1334
CheckMergeConsistency(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1335 MergeFailureCode SnapshotManager::CheckMergeConsistency(LockedFile* lock, const std::string& name,
1336 const SnapshotStatus& status) {
1337 CHECK(lock);
1338
1339 return merge_consistency_checker_(name, status);
1340 }
1341
CheckMergeConsistency(const std::string & name,const SnapshotStatus & status)1342 MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status) {
1343 if (!status.compression_enabled()) {
1344 // Do not try to verify old-style COWs yet.
1345 return MergeFailureCode::Ok;
1346 }
1347
1348 auto& dm = DeviceMapper::Instance();
1349
1350 std::string cow_image_name = GetMappedCowDeviceName(name, status);
1351 std::string cow_image_path;
1352 if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_path)) {
1353 LOG(ERROR) << "Failed to get path for cow device: " << cow_image_name;
1354 return MergeFailureCode::GetCowPathConsistencyCheck;
1355 }
1356
1357 // First pass, count # of ops.
1358 size_t num_ops = 0;
1359 {
1360 unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_CLOEXEC));
1361 if (fd < 0) {
1362 PLOG(ERROR) << "Failed to open " << cow_image_name;
1363 return MergeFailureCode::OpenCowConsistencyCheck;
1364 }
1365
1366 CowReader reader;
1367 if (!reader.Parse(std::move(fd))) {
1368 LOG(ERROR) << "Failed to parse cow " << cow_image_path;
1369 return MergeFailureCode::ParseCowConsistencyCheck;
1370 }
1371
1372 num_ops = reader.get_num_total_data_ops();
1373 }
1374
1375 // Second pass, try as hard as we can to get the actual number of blocks
1376 // the system thinks is merged.
1377 unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_DIRECT | O_SYNC | O_CLOEXEC));
1378 if (fd < 0) {
1379 PLOG(ERROR) << "Failed to open direct " << cow_image_name;
1380 return MergeFailureCode::OpenCowDirectConsistencyCheck;
1381 }
1382
1383 void* addr;
1384 size_t page_size = getpagesize();
1385 if (posix_memalign(&addr, page_size, page_size) < 0) {
1386 PLOG(ERROR) << "posix_memalign with page size " << page_size;
1387 return MergeFailureCode::MemAlignConsistencyCheck;
1388 }
1389
1390 // COWs are always at least 2MB, this is guaranteed in snapshot creation.
1391 std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
1392 if (!android::base::ReadFully(fd, buffer.get(), page_size)) {
1393 PLOG(ERROR) << "Direct read failed " << cow_image_name;
1394 return MergeFailureCode::DirectReadConsistencyCheck;
1395 }
1396
1397 auto header = reinterpret_cast<CowHeader*>(buffer.get());
1398 if (header->num_merge_ops != num_ops) {
1399 LOG(ERROR) << "COW consistency check failed, expected " << num_ops << " to be merged, "
1400 << "but " << header->num_merge_ops << " were actually recorded.";
1401 LOG(ERROR) << "Aborting merge progress for snapshot " << name
1402 << ", will try again next boot";
1403 return MergeFailureCode::WrongMergeCountConsistencyCheck;
1404 }
1405
1406 return MergeFailureCode::Ok;
1407 }
1408
MergeSecondPhaseSnapshots(LockedFile * lock)1409 MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
1410 std::vector<std::string> snapshots;
1411 if (!ListSnapshots(lock, &snapshots)) {
1412 return MergeFailureCode::ListSnapshots;
1413 }
1414
1415 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1416 CHECK(update_status.state() == UpdateState::Merging ||
1417 update_status.state() == UpdateState::MergeFailed);
1418 CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE);
1419
1420 update_status.set_state(UpdateState::Merging);
1421 update_status.set_merge_phase(MergePhase::SECOND_PHASE);
1422 if (!WriteSnapshotUpdateStatus(lock, update_status)) {
1423 return MergeFailureCode::WriteStatus;
1424 }
1425
1426 MergeFailureCode result = MergeFailureCode::Ok;
1427 for (const auto& snapshot : snapshots) {
1428 SnapshotStatus snapshot_status;
1429 if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
1430 return MergeFailureCode::ReadStatus;
1431 }
1432 if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
1433 continue;
1434 }
1435 auto code = SwitchSnapshotToMerge(lock, snapshot);
1436 if (code != MergeFailureCode::Ok) {
1437 LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
1438 if (result == MergeFailureCode::Ok) {
1439 result = code;
1440 }
1441 }
1442 }
1443 return result;
1444 }
1445
GetSnapshotBootIndicatorPath()1446 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1447 return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1448 }
1449
GetRollbackIndicatorPath()1450 std::string SnapshotManager::GetRollbackIndicatorPath() {
1451 return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1452 }
1453
GetForwardMergeIndicatorPath()1454 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1455 return metadata_dir_ + "/allow-forward-merge";
1456 }
1457
GetOldPartitionMetadataPath()1458 std::string SnapshotManager::GetOldPartitionMetadataPath() {
1459 return metadata_dir_ + "/old-partition-metadata";
1460 }
1461
AcknowledgeMergeSuccess(LockedFile * lock)1462 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1463 // It's not possible to remove update state in recovery, so write an
1464 // indicator that cleanup is needed on reboot. If a factory data reset
1465 // was requested, it doesn't matter, everything will get wiped anyway.
1466 // To make testing easier we consider a /data wipe as cleaned up.
1467 if (device_->IsRecovery()) {
1468 WriteUpdateState(lock, UpdateState::MergeCompleted);
1469 return;
1470 }
1471
1472 RemoveAllUpdateState(lock);
1473
1474 if (UpdateUsesUserSnapshots(lock) && !device()->IsTestDevice()) {
1475 if (snapuserd_client_) {
1476 snapuserd_client_->DetachSnapuserd();
1477 snapuserd_client_->CloseConnection();
1478 snapuserd_client_ = nullptr;
1479 }
1480 }
1481 }
1482
AcknowledgeMergeFailure(MergeFailureCode failure_code)1483 void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) {
1484 // Log first, so worst case, we always have a record of why the calls below
1485 // were being made.
1486 LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1487
1488 auto lock = LockExclusive();
1489 if (!lock) return;
1490
1491 // Since we released the lock in between WaitForMerge and here, it's
1492 // possible (1) the merge successfully completed or (2) was already
1493 // marked as a failure. So make sure to check the state again, and
1494 // only mark as a failure if appropriate.
1495 UpdateState state = ReadUpdateState(lock.get());
1496 if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1497 return;
1498 }
1499
1500 WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code);
1501 }
1502
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1503 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1504 const SnapshotStatus& status) {
1505 if (!UpdateUsesUserSnapshots(lock)) {
1506 if (IsSnapshotDevice(name)) {
1507 // We are extra-cautious here, to avoid deleting the wrong table.
1508 std::string target_type;
1509 DmTargetSnapshot::Status dm_status;
1510 if (!QuerySnapshotStatus(name, &target_type, &dm_status)) {
1511 return false;
1512 }
1513 if (target_type != "snapshot-merge") {
1514 LOG(ERROR) << "Unexpected target type " << target_type
1515 << " for snapshot device: " << name;
1516 return false;
1517 }
1518 if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1519 LOG(ERROR) << "Merge is unexpectedly incomplete for device " << name;
1520 return false;
1521 }
1522 if (!CollapseSnapshotDevice(lock, name, status)) {
1523 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1524 return false;
1525 }
1526 }
1527 } else {
1528 // Just collapse the device - no need to query again as we just did
1529 // prior to calling this function
1530 if (!CollapseSnapshotDevice(lock, name, status)) {
1531 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1532 return false;
1533 }
1534 }
1535
1536 // Note that collapsing is implicitly an Unmap, so we don't need to
1537 // unmap the snapshot.
1538
1539 if (!DeleteSnapshot(lock, name)) {
1540 LOG(ERROR) << "Could not delete snapshot: " << name;
1541 return false;
1542 }
1543 return true;
1544 }
1545
CollapseSnapshotDevice(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1546 bool SnapshotManager::CollapseSnapshotDevice(LockedFile* lock, const std::string& name,
1547 const SnapshotStatus& status) {
1548 if (!UpdateUsesUserSnapshots(lock)) {
1549 // Verify we have a snapshot-merge device.
1550 DeviceMapper::TargetInfo target;
1551 if (!GetSingleTarget(name, TableQuery::Table, &target)) {
1552 return false;
1553 }
1554 if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1555 // This should be impossible, it was checked earlier.
1556 LOG(ERROR) << "Snapshot device has invalid target type: " << name;
1557 return false;
1558 }
1559
1560 std::string base_device, cow_device;
1561 if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1562 LOG(ERROR) << "Could not parse snapshot device " << name
1563 << " parameters: " << target.data;
1564 return false;
1565 }
1566 }
1567
1568 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1569 if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1570 LOG(ERROR) << "Snapshot " << name
1571 << " size is not sector aligned: " << status.snapshot_size();
1572 return false;
1573 }
1574
1575 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1576 // Create a DmTable that is identical to the base device.
1577 CreateLogicalPartitionParams base_device_params{
1578 .block_device = device_->GetSuperDevice(slot),
1579 .metadata_slot = slot,
1580 .partition_name = name,
1581 .partition_opener = &device_->GetPartitionOpener(),
1582 };
1583 DmTable table;
1584 if (!CreateDmTable(base_device_params, &table)) {
1585 LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1586 return false;
1587 }
1588
1589 if (!dm_.LoadTableAndActivate(name, table)) {
1590 return false;
1591 }
1592
1593 if (!UpdateUsesUserSnapshots(lock)) {
1594 // Attempt to delete the snapshot device if one still exists. Nothing
1595 // should be depending on the device, and device-mapper should have
1596 // flushed remaining I/O. We could in theory replace with dm-zero (or
1597 // re-use the table above), but for now it's better to know why this
1598 // would fail.
1599 //
1600 // Furthermore, we should not be trying to unmap for userspace snapshot
1601 // as unmap will fail since dm-user itself was a snapshot device prior
1602 // to switching of tables. Unmap will fail as the device will be mounted
1603 // by system partitions
1604 if (status.compression_enabled()) {
1605 auto dm_user_name = GetDmUserCowName(name, GetSnapshotDriver(lock));
1606 UnmapDmUserDevice(dm_user_name);
1607 }
1608 }
1609
1610 // We can't delete base device immediately as daemon holds a reference.
1611 // Make sure we wait for all the worker threads to terminate and release
1612 // the reference
1613 if (UpdateUsesUserSnapshots(lock) && EnsureSnapuserdConnected()) {
1614 if (!snapuserd_client_->WaitForDeviceDelete(name)) {
1615 LOG(ERROR) << "Failed to wait for " << name << " control device to delete";
1616 }
1617 }
1618
1619 auto base_name = GetBaseDeviceName(name);
1620 if (!DeleteDeviceIfExists(base_name)) {
1621 LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1622 }
1623
1624 if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
1625 LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
1626 }
1627
1628 return true;
1629 }
1630
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1631 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1632 const std::function<bool()>& before_cancel) {
1633 auto slot = GetCurrentSlot();
1634 if (slot == Slot::Unknown) {
1635 return false;
1636 }
1637
1638 // If all snapshots were reflashed, then cancel the entire update.
1639 if (AreAllSnapshotsCancelled(lock)) {
1640 LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1641 return RemoveAllUpdateState(lock, before_cancel);
1642 }
1643
1644 // If update has been rolled back, then cancel the entire update.
1645 // Client (update_engine) is responsible for doing additional cleanup work on its own states
1646 // when ProcessUpdateState() returns UpdateState::Cancelled.
1647 auto current_slot = GetCurrentSlot();
1648 if (current_slot != Slot::Source) {
1649 LOG(INFO) << "Update state is being processed while booting at " << current_slot
1650 << " slot, taking no action.";
1651 return false;
1652 }
1653
1654 // current_slot == Source. Attempt to detect rollbacks.
1655 if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1656 // This unverified update is not attempted. Take no action.
1657 PLOG(INFO) << "Rollback indicator not detected. "
1658 << "Update state is being processed before reboot, taking no action.";
1659 return false;
1660 }
1661
1662 LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1663 return RemoveAllUpdateState(lock, before_cancel);
1664 }
1665
PerformInitTransition(InitTransition transition,std::vector<std::string> * snapuserd_argv)1666 bool SnapshotManager::PerformInitTransition(InitTransition transition,
1667 std::vector<std::string>* snapuserd_argv) {
1668 LOG(INFO) << "Performing transition for snapuserd.";
1669
1670 // Don't use EnsureSnapuserdConnected() because this is called from init,
1671 // and attempting to do so will deadlock.
1672 if (!snapuserd_client_ && transition != InitTransition::SELINUX_DETACH) {
1673 snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
1674 if (!snapuserd_client_) {
1675 LOG(ERROR) << "Unable to connect to snapuserd";
1676 return false;
1677 }
1678 }
1679
1680 auto lock = LockExclusive();
1681 if (!lock) return false;
1682
1683 std::vector<std::string> snapshots;
1684 if (!ListSnapshots(lock.get(), &snapshots)) {
1685 LOG(ERROR) << "Failed to list snapshots.";
1686 return false;
1687 }
1688
1689 if (UpdateUsesUserSnapshots(lock.get()) && transition == InitTransition::SELINUX_DETACH) {
1690 snapuserd_argv->emplace_back("-user_snapshot");
1691 if (UpdateUsesIouring(lock.get())) {
1692 snapuserd_argv->emplace_back("-io_uring");
1693 }
1694 }
1695
1696 size_t num_cows = 0;
1697 size_t ok_cows = 0;
1698 for (const auto& snapshot : snapshots) {
1699 std::string user_cow_name = GetDmUserCowName(snapshot, GetSnapshotDriver(lock.get()));
1700
1701 if (dm_.GetState(user_cow_name) == DmDeviceState::INVALID) {
1702 continue;
1703 }
1704
1705 DeviceMapper::TargetInfo target;
1706 if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
1707 continue;
1708 }
1709
1710 auto target_type = DeviceMapper::GetTargetType(target.spec);
1711 if (target_type != "user") {
1712 LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
1713 continue;
1714 }
1715
1716 num_cows++;
1717
1718 SnapshotStatus snapshot_status;
1719 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
1720 LOG(ERROR) << "Unable to read snapshot status: " << snapshot;
1721 continue;
1722 }
1723
1724 auto misc_name = user_cow_name;
1725
1726 DmTable table;
1727 table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
1728 if (!dm_.LoadTableAndActivate(user_cow_name, table)) {
1729 LOG(ERROR) << "Unable to swap tables for " << misc_name;
1730 continue;
1731 }
1732
1733 std::string source_device_name;
1734 if (snapshot_status.old_partition_size() > 0) {
1735 source_device_name = GetSourceDeviceName(snapshot);
1736 } else {
1737 source_device_name = GetBaseDeviceName(snapshot);
1738 }
1739
1740 std::string source_device;
1741 if (!dm_.GetDmDevicePathByName(source_device_name, &source_device)) {
1742 LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1743 continue;
1744 }
1745
1746 std::string base_path_merge;
1747 if (!dm_.GetDmDevicePathByName(GetBaseDeviceName(snapshot), &base_path_merge)) {
1748 LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1749 continue;
1750 }
1751
1752 std::string cow_image_name = GetMappedCowDeviceName(snapshot, snapshot_status);
1753
1754 std::string cow_image_device;
1755 if (!dm_.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
1756 LOG(ERROR) << "Could not get device path for " << cow_image_name;
1757 continue;
1758 }
1759
1760 // Wait for ueventd to acknowledge and create the control device node.
1761 std::string control_device = "/dev/dm-user/" + misc_name;
1762 if (!WaitForDevice(control_device, 10s)) {
1763 LOG(ERROR) << "dm-user control device no found: " << misc_name;
1764 continue;
1765 }
1766
1767 if (transition == InitTransition::SELINUX_DETACH) {
1768 if (!UpdateUsesUserSnapshots(lock.get())) {
1769 auto message = misc_name + "," + cow_image_device + "," + source_device;
1770 snapuserd_argv->emplace_back(std::move(message));
1771 } else {
1772 auto message = misc_name + "," + cow_image_device + "," + source_device + "," +
1773 base_path_merge;
1774 snapuserd_argv->emplace_back(std::move(message));
1775 }
1776
1777 // Do not attempt to connect to the new snapuserd yet, it hasn't
1778 // been started. We do however want to wait for the misc device
1779 // to have been created.
1780 ok_cows++;
1781 continue;
1782 }
1783
1784 uint64_t base_sectors;
1785 if (!UpdateUsesUserSnapshots(lock.get())) {
1786 base_sectors =
1787 snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, source_device);
1788 } else {
1789 base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_image_device,
1790 source_device, base_path_merge);
1791 }
1792
1793 if (base_sectors == 0) {
1794 // Unrecoverable as metadata reads from cow device failed
1795 LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd";
1796 return false;
1797 }
1798
1799 CHECK(base_sectors <= target.spec.length);
1800
1801 if (!snapuserd_client_->AttachDmUser(misc_name)) {
1802 // This error is unrecoverable. We cannot proceed because reads to
1803 // the underlying device will fail.
1804 LOG(FATAL) << "Could not initialize snapuserd for " << user_cow_name;
1805 return false;
1806 }
1807
1808 ok_cows++;
1809 }
1810
1811 if (ok_cows != num_cows) {
1812 LOG(ERROR) << "Could not transition all snapuserd consumers.";
1813 return false;
1814 }
1815 return true;
1816 }
1817
ReadCurrentMetadata()1818 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1819 const auto& opener = device_->GetPartitionOpener();
1820 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1821 auto super_device = device_->GetSuperDevice(slot);
1822 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1823 if (!metadata) {
1824 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1825 return nullptr;
1826 }
1827 return metadata;
1828 }
1829
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1830 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1831 const LpMetadata& metadata, const std::string& name) {
1832 auto partition = android::fs_mgr::FindPartition(metadata, name);
1833 if (!partition) return MetadataPartitionState::None;
1834 if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1835 return MetadataPartitionState::Updated;
1836 }
1837 return MetadataPartitionState::Flashed;
1838 }
1839
AreAllSnapshotsCancelled(LockedFile * lock)1840 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1841 std::vector<std::string> snapshots;
1842 if (!ListSnapshots(lock, &snapshots)) {
1843 LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1844 << "after applying an update. Assuming no snapshots.";
1845 // Let HandleCancelledUpdate resets UpdateState.
1846 return true;
1847 }
1848
1849 std::map<std::string, bool> flashing_status;
1850
1851 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1852 LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1853 << "removing update states.";
1854 return false;
1855 }
1856
1857 bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1858 [](const auto& pair) { return pair.second; });
1859
1860 if (all_snapshots_cancelled) {
1861 LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1862 }
1863 return all_snapshots_cancelled;
1864 }
1865
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1866 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1867 const std::vector<std::string>& snapshots,
1868 std::map<std::string, bool>* out) {
1869 CHECK(lock);
1870
1871 auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1872 if (source_slot_suffix.empty()) {
1873 return false;
1874 }
1875 uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1876 uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1877
1878 // Attempt to detect re-flashing on each partition.
1879 // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1880 // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1881 // deleted. Caller is responsible for merging the rest of the snapshots.
1882 // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1883 //
1884 // Note that we use target slot metadata, since if an OTA has been applied
1885 // to the target slot, we can detect the UPDATED flag. Any kind of flash
1886 // operation against dynamic partitions ensures that all copies of the
1887 // metadata are in sync, so flashing all partitions on the source slot will
1888 // remove the UPDATED flag on the target slot as well.
1889 const auto& opener = device_->GetPartitionOpener();
1890 auto super_device = device_->GetSuperDevice(target_slot);
1891 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1892 if (!metadata) {
1893 return false;
1894 }
1895
1896 for (const auto& snapshot_name : snapshots) {
1897 if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1898 MetadataPartitionState::Updated) {
1899 out->emplace(snapshot_name, false);
1900 } else {
1901 // Delete snapshots for partitions that are re-flashed after the update.
1902 LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1903 out->emplace(snapshot_name, true);
1904 }
1905 }
1906 return true;
1907 }
1908
RemoveInvalidSnapshots(LockedFile * lock)1909 void SnapshotManager::RemoveInvalidSnapshots(LockedFile* lock) {
1910 std::vector<std::string> snapshots;
1911
1912 // Remove the stale snapshot metadata
1913 //
1914 // We make sure that all the three cases
1915 // are valid before removing the snapshot metadata:
1916 //
1917 // 1: dm state is active
1918 // 2: Root fs is not mounted off as a snapshot device
1919 // 3: Snapshot slot suffix should match current device slot
1920 if (!ListSnapshots(lock, &snapshots, device_->GetSlotSuffix()) || snapshots.empty()) {
1921 return;
1922 }
1923
1924 // We indeed have some invalid snapshots
1925 for (const auto& name : snapshots) {
1926 if (dm_.GetState(name) == DmDeviceState::ACTIVE && !IsSnapshotDevice(name)) {
1927 if (!DeleteSnapshot(lock, name)) {
1928 LOG(ERROR) << "Failed to delete invalid snapshot: " << name;
1929 } else {
1930 LOG(INFO) << "Invalid snapshot: " << name << " deleted";
1931 }
1932 }
1933 }
1934 }
1935
RemoveAllSnapshots(LockedFile * lock)1936 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1937 std::vector<std::string> snapshots;
1938 if (!ListSnapshots(lock, &snapshots)) {
1939 LOG(ERROR) << "Could not list snapshots";
1940 return false;
1941 }
1942
1943 std::map<std::string, bool> flashing_status;
1944 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1945 LOG(WARNING) << "Failed to get flashing status";
1946 }
1947
1948 auto current_slot = GetCurrentSlot();
1949 bool ok = true;
1950 bool has_mapped_cow_images = false;
1951 for (const auto& name : snapshots) {
1952 // If booting off source slot, it is okay to unmap and delete all the snapshots.
1953 // If boot indicator is missing, update state is None or Initiated, so
1954 // it is also okay to unmap and delete all the snapshots.
1955 // If booting off target slot,
1956 // - should not unmap because:
1957 // - In Android mode, snapshots are not mapped, but
1958 // filesystems are mounting off dm-linear targets directly.
1959 // - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1960 // - If partition is flashed or unknown, it is okay to delete snapshots.
1961 // Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1962 // as dm-snapshot (for example, after merge completes).
1963 bool should_unmap = current_slot != Slot::Target;
1964 bool should_delete = ShouldDeleteSnapshot(flashing_status, current_slot, name);
1965 if (should_unmap && android::base::EndsWith(name, device_->GetSlotSuffix())) {
1966 // Something very unexpected has happened - we want to unmap this
1967 // snapshot, but it's on the wrong slot. We can't unmap an active
1968 // partition. If this is not really a snapshot, skip the unmap
1969 // step.
1970 if (dm_.GetState(name) == DmDeviceState::INVALID || !IsSnapshotDevice(name)) {
1971 LOG(ERROR) << "Detected snapshot " << name << " on " << current_slot << " slot"
1972 << " for source partition; removing without unmap.";
1973 should_unmap = false;
1974 }
1975 }
1976
1977 bool partition_ok = true;
1978 if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1979 partition_ok = false;
1980 }
1981 if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1982 partition_ok = false;
1983 }
1984
1985 if (!partition_ok) {
1986 // Remember whether or not we were able to unmap the cow image.
1987 auto cow_image_device = GetCowImageDeviceName(name);
1988 has_mapped_cow_images |=
1989 (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
1990
1991 ok = false;
1992 }
1993 }
1994
1995 if (ok || !has_mapped_cow_images) {
1996 // Delete any image artifacts as a precaution, in case an update is
1997 // being cancelled due to some corrupted state in an lp_metadata file.
1998 // Note that we do not do this if some cow images are still mapped,
1999 // since we must not remove backing storage if it's in use.
2000 if (!EnsureImageManager() || !images_->RemoveAllImages()) {
2001 LOG(ERROR) << "Could not remove all snapshot artifacts";
2002 return false;
2003 }
2004 }
2005 return ok;
2006 }
2007
2008 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)2009 bool SnapshotManager::ShouldDeleteSnapshot(const std::map<std::string, bool>& flashing_status,
2010 Slot current_slot, const std::string& name) {
2011 if (current_slot != Slot::Target) {
2012 return true;
2013 }
2014 auto it = flashing_status.find(name);
2015 if (it == flashing_status.end()) {
2016 LOG(WARNING) << "Can't determine flashing status for " << name;
2017 return true;
2018 }
2019 if (it->second) {
2020 // partition flashed, okay to delete obsolete snapshots
2021 return true;
2022 }
2023 return !IsSnapshotDevice(name);
2024 }
2025
GetUpdateState(double * progress)2026 UpdateState SnapshotManager::GetUpdateState(double* progress) {
2027 // If we've never started an update, the state file won't exist.
2028 auto state_file = GetStateFilePath();
2029 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2030 return UpdateState::None;
2031 }
2032
2033 auto lock = LockShared();
2034 if (!lock) {
2035 return UpdateState::None;
2036 }
2037
2038 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
2039 auto state = update_status.state();
2040 if (progress == nullptr) {
2041 return state;
2042 }
2043
2044 if (state == UpdateState::MergeCompleted) {
2045 *progress = 100.0;
2046 return state;
2047 }
2048
2049 *progress = 0.0;
2050 if (state != UpdateState::Merging) {
2051 return state;
2052 }
2053
2054 if (!UpdateUsesUserSnapshots(lock.get())) {
2055 // Sum all the snapshot states as if the system consists of a single huge
2056 // snapshots device, then compute the merge completion percentage of that
2057 // device.
2058 std::vector<std::string> snapshots;
2059 if (!ListSnapshots(lock.get(), &snapshots)) {
2060 LOG(ERROR) << "Could not list snapshots";
2061 return state;
2062 }
2063
2064 DmTargetSnapshot::Status fake_snapshots_status = {};
2065 for (const auto& snapshot : snapshots) {
2066 DmTargetSnapshot::Status current_status;
2067
2068 if (!IsSnapshotDevice(snapshot)) continue;
2069 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) continue;
2070
2071 fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
2072 fake_snapshots_status.total_sectors += current_status.total_sectors;
2073 fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
2074 }
2075
2076 *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
2077 update_status.sectors_allocated());
2078 } else {
2079 if (EnsureSnapuserdConnected()) {
2080 *progress = snapuserd_client_->GetMergePercent();
2081 }
2082 }
2083
2084 return state;
2085 }
2086
UpdateUsesCompression()2087 bool SnapshotManager::UpdateUsesCompression() {
2088 auto lock = LockShared();
2089 if (!lock) return false;
2090 return UpdateUsesCompression(lock.get());
2091 }
2092
UpdateUsesCompression(LockedFile * lock)2093 bool SnapshotManager::UpdateUsesCompression(LockedFile* lock) {
2094 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2095 return update_status.compression_enabled();
2096 }
2097
UpdateUsesIouring(LockedFile * lock)2098 bool SnapshotManager::UpdateUsesIouring(LockedFile* lock) {
2099 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2100 return update_status.io_uring_enabled();
2101 }
2102
UpdateUsesUserSnapshots()2103 bool SnapshotManager::UpdateUsesUserSnapshots() {
2104 // This and the following function is constantly
2105 // invoked during snapshot merge. We want to avoid
2106 // constantly reading from disk. Hence, store this
2107 // value in memory.
2108 //
2109 // Furthermore, this value in the disk is set
2110 // only when OTA is applied and doesn't change
2111 // during merge phase. Hence, once we know that
2112 // the value is read from disk the very first time,
2113 // it is safe to read successive checks from memory.
2114 if (is_snapshot_userspace_.has_value()) {
2115 return is_snapshot_userspace_.value();
2116 }
2117
2118 auto lock = LockShared();
2119 if (!lock) return false;
2120
2121 return UpdateUsesUserSnapshots(lock.get());
2122 }
2123
UpdateUsesUserSnapshots(LockedFile * lock)2124 bool SnapshotManager::UpdateUsesUserSnapshots(LockedFile* lock) {
2125 // See UpdateUsesUserSnapshots()
2126 if (is_snapshot_userspace_.has_value()) {
2127 return is_snapshot_userspace_.value();
2128 }
2129
2130 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
2131 is_snapshot_userspace_ = update_status.userspace_snapshots();
2132 return is_snapshot_userspace_.value();
2133 }
2134
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots,const std::string & suffix)2135 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
2136 const std::string& suffix) {
2137 CHECK(lock);
2138
2139 auto dir_path = metadata_dir_ + "/snapshots"s;
2140 std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
2141 if (!dir) {
2142 PLOG(ERROR) << "opendir failed: " << dir_path;
2143 return false;
2144 }
2145
2146 struct dirent* dp;
2147 while ((dp = readdir(dir.get())) != nullptr) {
2148 if (dp->d_type != DT_REG) continue;
2149
2150 std::string name(dp->d_name);
2151 if (!suffix.empty() && !android::base::EndsWith(name, suffix)) {
2152 continue;
2153 }
2154
2155 // Insert system and product partition at the beginning so that
2156 // during snapshot-merge, these partitions are merged first.
2157 if (name == "system_a" || name == "system_b" || name == "product_a" ||
2158 name == "product_b") {
2159 snapshots->insert(snapshots->begin(), std::move(name));
2160 } else {
2161 snapshots->emplace_back(std::move(name));
2162 }
2163 }
2164
2165 return true;
2166 }
2167
IsSnapshotManagerNeeded()2168 bool SnapshotManager::IsSnapshotManagerNeeded() {
2169 return access(kBootIndicatorPath, F_OK) == 0;
2170 }
2171
GetGlobalRollbackIndicatorPath()2172 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
2173 return kRollbackIndicatorPath;
2174 }
2175
NeedSnapshotsInFirstStageMount()2176 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
2177 // If we fail to read, we'll wind up using CreateLogicalPartitions, which
2178 // will create devices that look like the old slot, except with extra
2179 // content at the end of each device. This will confuse dm-verity, and
2180 // ultimately we'll fail to boot. Why not make it a fatal error and have
2181 // the reason be clearer? Because the indicator file still exists, and
2182 // if this was FATAL, reverting to the old slot would be broken.
2183 auto slot = GetCurrentSlot();
2184
2185 if (slot != Slot::Target) {
2186 if (slot == Slot::Source) {
2187 // Device is rebooting into the original slot, so mark this as a
2188 // rollback.
2189 auto path = GetRollbackIndicatorPath();
2190 if (!android::base::WriteStringToFile("1", path)) {
2191 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
2192 } else {
2193 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
2194 }
2195 }
2196 LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
2197 return false;
2198 }
2199
2200 // If we can't read the update state, it's unlikely anything else will
2201 // succeed, so this is a fatal error. We'll eventually exhaust boot
2202 // attempts and revert to the old slot.
2203 auto lock = LockShared();
2204 if (!lock) {
2205 LOG(FATAL) << "Could not read update state to determine snapshot status";
2206 return false;
2207 }
2208 switch (ReadUpdateState(lock.get())) {
2209 case UpdateState::Unverified:
2210 case UpdateState::Merging:
2211 case UpdateState::MergeFailed:
2212 return true;
2213 default:
2214 return false;
2215 }
2216 }
2217
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)2218 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
2219 const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
2220 LOG(INFO) << "Creating logical partitions with snapshots as needed";
2221
2222 auto lock = LockExclusive();
2223 if (!lock) return false;
2224
2225 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2226 return MapAllPartitions(lock.get(), super_device, slot, timeout_ms);
2227 }
2228
MapAllPartitions(LockedFile * lock,const std::string & super_device,uint32_t slot,const std::chrono::milliseconds & timeout_ms)2229 bool SnapshotManager::MapAllPartitions(LockedFile* lock, const std::string& super_device,
2230 uint32_t slot, const std::chrono::milliseconds& timeout_ms) {
2231 const auto& opener = device_->GetPartitionOpener();
2232 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
2233 if (!metadata) {
2234 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
2235 return false;
2236 }
2237
2238 if (!EnsureImageManager()) {
2239 return false;
2240 }
2241
2242 for (const auto& partition : metadata->partitions) {
2243 if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
2244 LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
2245 << kCowGroupName;
2246 continue;
2247 }
2248
2249 CreateLogicalPartitionParams params = {
2250 .block_device = super_device,
2251 .metadata = metadata.get(),
2252 .partition = &partition,
2253 .partition_opener = &opener,
2254 .timeout_ms = timeout_ms,
2255 };
2256 if (!MapPartitionWithSnapshot(lock, std::move(params), SnapshotContext::Mount, nullptr)) {
2257 return false;
2258 }
2259 }
2260
2261 LOG(INFO) << "Created logical partitions with snapshot.";
2262 return true;
2263 }
2264
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)2265 static std::chrono::milliseconds GetRemainingTime(
2266 const std::chrono::milliseconds& timeout,
2267 const std::chrono::time_point<std::chrono::steady_clock>& begin) {
2268 // If no timeout is specified, execute all commands without specifying any timeout.
2269 if (timeout.count() == 0) return std::chrono::milliseconds(0);
2270 auto passed_time = std::chrono::steady_clock::now() - begin;
2271 auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
2272 if (remaining_time.count() <= 0) {
2273 LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
2274 << remaining_time.count() << "ms remaining)";
2275 // Return min() instead of remaining_time here because 0 is treated as a special value for
2276 // no timeout, where the rest of the commands will still be executed.
2277 return std::chrono::milliseconds::min();
2278 }
2279 return remaining_time;
2280 }
2281
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,SnapshotContext context,SnapshotPaths * paths)2282 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
2283 CreateLogicalPartitionParams params,
2284 SnapshotContext context, SnapshotPaths* paths) {
2285 auto begin = std::chrono::steady_clock::now();
2286
2287 CHECK(lock);
2288
2289 if (params.GetPartitionName() != params.GetDeviceName()) {
2290 LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
2291 << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
2292 return false;
2293 }
2294
2295 // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
2296 // reading super partition metadata).
2297 CreateLogicalPartitionParams::OwnedData params_owned_data;
2298 if (!params.InitDefaults(¶ms_owned_data)) {
2299 return false;
2300 }
2301
2302 if (!params.partition->num_extents) {
2303 LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
2304 return true; // leave path empty to indicate that nothing is mapped.
2305 }
2306
2307 // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
2308 // partition still has a snapshot that needs to be mapped. If no live snapshot or merge
2309 // completed, live_snapshot_status is set to nullopt.
2310 std::optional<SnapshotStatus> live_snapshot_status;
2311 do {
2312 if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
2313 LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
2314 << params.GetPartitionName();
2315 break;
2316 }
2317 auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
2318 if (access(file_path.c_str(), F_OK) != 0) {
2319 if (errno != ENOENT) {
2320 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
2321 << ": Can't access " << file_path;
2322 return false;
2323 }
2324 break;
2325 }
2326 live_snapshot_status = std::make_optional<SnapshotStatus>();
2327 if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
2328 return false;
2329 }
2330 // No live snapshot if merge is completed.
2331 if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
2332 live_snapshot_status.reset();
2333 }
2334
2335 if (live_snapshot_status->state() == SnapshotState::NONE ||
2336 live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
2337 0) {
2338 LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
2339 << " is invalid, ignoring: state = "
2340 << SnapshotState_Name(live_snapshot_status->state())
2341 << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
2342 << ", cow_file_size = " << live_snapshot_status->cow_file_size();
2343 live_snapshot_status.reset();
2344 }
2345 } while (0);
2346
2347 if (live_snapshot_status.has_value()) {
2348 // dm-snapshot requires the base device to be writable.
2349 params.force_writable = true;
2350 // Map the base device with a different name to avoid collision.
2351 params.device_name = GetBaseDeviceName(params.GetPartitionName());
2352 }
2353
2354 AutoDeviceList created_devices;
2355
2356 // Create the base device for the snapshot, or if there is no snapshot, the
2357 // device itself. This device consists of the real blocks in the super
2358 // partition that this logical partition occupies.
2359 std::string base_path;
2360 if (!CreateLogicalPartition(params, &base_path)) {
2361 LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
2362 << " as device " << params.GetDeviceName();
2363 return false;
2364 }
2365 created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, params.GetDeviceName());
2366
2367 if (paths) {
2368 paths->target_device = base_path;
2369 }
2370
2371 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2372 if (remaining_time.count() < 0) {
2373 return false;
2374 }
2375
2376 // Wait for the base device to appear
2377 if (!WaitForDevice(base_path, remaining_time)) {
2378 return false;
2379 }
2380
2381 if (!live_snapshot_status.has_value()) {
2382 created_devices.Release();
2383 return true;
2384 }
2385
2386 // We don't have ueventd in first-stage init, so use device major:minor
2387 // strings instead.
2388 std::string base_device;
2389 if (!dm_.GetDeviceString(params.GetDeviceName(), &base_device)) {
2390 LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
2391 return false;
2392 }
2393
2394 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2395 if (remaining_time.count() < 0) return false;
2396
2397 std::string cow_name;
2398 CreateLogicalPartitionParams cow_params = params;
2399 cow_params.timeout_ms = remaining_time;
2400 if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
2401 return false;
2402 }
2403 std::string cow_device;
2404 if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
2405 LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
2406 return false;
2407 }
2408 if (paths) {
2409 paths->cow_device_name = cow_name;
2410 }
2411
2412 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2413 if (remaining_time.count() < 0) return false;
2414
2415 if (context == SnapshotContext::Update && live_snapshot_status->compression_enabled()) {
2416 // Stop here, we can't run dm-user yet, the COW isn't built.
2417 created_devices.Release();
2418 return true;
2419 }
2420
2421 if (live_snapshot_status->compression_enabled()) {
2422 // Get the source device (eg the view of the partition from before it was resized).
2423 std::string source_device_path;
2424 if (live_snapshot_status->old_partition_size() > 0) {
2425 if (!MapSourceDevice(lock, params.GetPartitionName(), remaining_time,
2426 &source_device_path)) {
2427 LOG(ERROR) << "Could not map source device for: " << cow_name;
2428 return false;
2429 }
2430
2431 auto source_device = GetSourceDeviceName(params.GetPartitionName());
2432 created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, source_device);
2433 } else {
2434 source_device_path = base_path;
2435 }
2436
2437 if (!WaitForDevice(source_device_path, remaining_time)) {
2438 return false;
2439 }
2440
2441 std::string cow_path;
2442 if (!GetMappedImageDevicePath(cow_name, &cow_path)) {
2443 LOG(ERROR) << "Could not determine path for: " << cow_name;
2444 return false;
2445 }
2446 if (!WaitForDevice(cow_path, remaining_time)) {
2447 return false;
2448 }
2449
2450 auto name = GetDmUserCowName(params.GetPartitionName(), GetSnapshotDriver(lock));
2451
2452 std::string new_cow_device;
2453 if (!MapDmUserCow(lock, name, cow_path, source_device_path, base_path, remaining_time,
2454 &new_cow_device)) {
2455 LOG(ERROR) << "Could not map dm-user device for partition "
2456 << params.GetPartitionName();
2457 return false;
2458 }
2459 created_devices.EmplaceBack<AutoUnmapDevice>(&dm_, name);
2460
2461 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2462 if (remaining_time.count() < 0) return false;
2463
2464 cow_device = new_cow_device;
2465 }
2466
2467 // For userspace snapshots, dm-user block device itself will act as a
2468 // snapshot device. There is one subtle difference - MapSnapshot will create
2469 // either snapshot target or snapshot-merge target based on the underlying
2470 // state of the snapshot device. If snapshot-merge target is created, merge
2471 // will immediately start in the kernel.
2472 //
2473 // This is no longer true with respect to userspace snapshots. When dm-user
2474 // block device is created, we just have the snapshots ready but daemon in
2475 // the user-space will not start the merge. We have to explicitly inform the
2476 // daemon to resume the merge. Check ProcessUpdateState() call stack.
2477 if (!UpdateUsesUserSnapshots(lock)) {
2478 std::string path;
2479 if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
2480 &path)) {
2481 LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
2482 return false;
2483 }
2484 // No need to add params.GetPartitionName() to created_devices since it is immediately
2485 // released.
2486
2487 if (paths) {
2488 paths->snapshot_device = path;
2489 }
2490 LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << path;
2491 } else {
2492 LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at "
2493 << cow_device;
2494 }
2495
2496 created_devices.Release();
2497
2498 return true;
2499 }
2500
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)2501 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
2502 const std::string& target_partition_name) {
2503 CHECK(lock);
2504
2505 if (!UnmapSnapshot(lock, target_partition_name)) {
2506 return false;
2507 }
2508
2509 if (!UnmapCowDevices(lock, target_partition_name)) {
2510 return false;
2511 }
2512
2513 auto base_name = GetBaseDeviceName(target_partition_name);
2514 if (!DeleteDeviceIfExists(base_name)) {
2515 LOG(ERROR) << "Cannot delete base device: " << base_name;
2516 return false;
2517 }
2518
2519 auto source_name = GetSourceDeviceName(target_partition_name);
2520 if (!DeleteDeviceIfExists(source_name)) {
2521 LOG(ERROR) << "Cannot delete source device: " << source_name;
2522 return false;
2523 }
2524
2525 LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
2526
2527 return true;
2528 }
2529
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)2530 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
2531 const SnapshotStatus& snapshot_status,
2532 AutoDeviceList* created_devices, std::string* cow_name) {
2533 CHECK(lock);
2534 CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
2535 auto begin = std::chrono::steady_clock::now();
2536
2537 std::string partition_name = params.GetPartitionName();
2538 std::string cow_image_name = GetCowImageDeviceName(partition_name);
2539 *cow_name = GetCowName(partition_name);
2540
2541 // Map COW image if necessary.
2542 if (snapshot_status.cow_file_size() > 0) {
2543 if (!EnsureImageManager()) return false;
2544 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2545 if (remaining_time.count() < 0) return false;
2546
2547 if (!MapCowImage(partition_name, remaining_time).has_value()) {
2548 LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
2549 return false;
2550 }
2551 created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
2552
2553 // If no COW partition exists, just return the image alone.
2554 if (snapshot_status.cow_partition_size() == 0) {
2555 *cow_name = std::move(cow_image_name);
2556 LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
2557 return true;
2558 }
2559 }
2560
2561 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2562 if (remaining_time.count() < 0) return false;
2563
2564 CHECK(snapshot_status.cow_partition_size() > 0);
2565
2566 // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
2567 // COW image device as the last extent.
2568 CreateLogicalPartitionParams cow_partition_params = params;
2569 cow_partition_params.partition = nullptr;
2570 cow_partition_params.partition_name = *cow_name;
2571 cow_partition_params.device_name.clear();
2572 DmTable table;
2573 if (!CreateDmTable(cow_partition_params, &table)) {
2574 return false;
2575 }
2576 // If the COW image exists, append it as the last extent.
2577 if (snapshot_status.cow_file_size() > 0) {
2578 std::string cow_image_device;
2579 if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
2580 LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
2581 return false;
2582 }
2583 auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
2584 auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
2585 table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
2586 0);
2587 }
2588
2589 // We have created the DmTable now. Map it.
2590 std::string cow_path;
2591 if (!dm_.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
2592 LOG(ERROR) << "Could not create COW device: " << *cow_name;
2593 return false;
2594 }
2595 created_devices->EmplaceBack<AutoUnmapDevice>(&dm_, *cow_name);
2596 LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
2597 return true;
2598 }
2599
UnmapCowDevices(LockedFile * lock,const std::string & name)2600 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
2601 CHECK(lock);
2602 if (!EnsureImageManager()) return false;
2603
2604 if (UpdateUsesCompression(lock) && !UpdateUsesUserSnapshots(lock)) {
2605 auto dm_user_name = GetDmUserCowName(name, GetSnapshotDriver(lock));
2606 if (!UnmapDmUserDevice(dm_user_name)) {
2607 return false;
2608 }
2609 }
2610
2611 if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
2612 LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
2613 return false;
2614 }
2615
2616 std::string cow_image_name = GetCowImageDeviceName(name);
2617 if (!images_->UnmapImageIfExists(cow_image_name)) {
2618 LOG(ERROR) << "Cannot unmap image " << cow_image_name;
2619 return false;
2620 }
2621 return true;
2622 }
2623
UnmapDmUserDevice(const std::string & dm_user_name)2624 bool SnapshotManager::UnmapDmUserDevice(const std::string& dm_user_name) {
2625 if (dm_.GetState(dm_user_name) == DmDeviceState::INVALID) {
2626 return true;
2627 }
2628
2629 if (!DeleteDeviceIfExists(dm_user_name)) {
2630 LOG(ERROR) << "Cannot unmap " << dm_user_name;
2631 return false;
2632 }
2633
2634 if (EnsureSnapuserdConnected()) {
2635 if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2636 LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2637 return false;
2638 }
2639 }
2640
2641 // Ensure the control device is gone so we don't run into ABA problems.
2642 auto control_device = "/dev/dm-user/" + dm_user_name;
2643 if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2644 LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2645 return false;
2646 }
2647 return true;
2648 }
2649
UnmapUserspaceSnapshotDevice(LockedFile * lock,const std::string & snapshot_name)2650 bool SnapshotManager::UnmapUserspaceSnapshotDevice(LockedFile* lock,
2651 const std::string& snapshot_name) {
2652 auto dm_user_name = GetDmUserCowName(snapshot_name, GetSnapshotDriver(lock));
2653 if (dm_.GetState(dm_user_name) == DmDeviceState::INVALID) {
2654 return true;
2655 }
2656
2657 CHECK(lock);
2658
2659 SnapshotStatus snapshot_status;
2660
2661 if (!ReadSnapshotStatus(lock, snapshot_name, &snapshot_status)) {
2662 return false;
2663 }
2664 // If the merge is complete, then we switch dm tables which is equivalent
2665 // to unmap; hence, we can't be deleting the device
2666 // as the table would be mounted off partitions and will fail.
2667 if (snapshot_status.state() != SnapshotState::MERGE_COMPLETED) {
2668 if (!DeleteDeviceIfExists(dm_user_name)) {
2669 LOG(ERROR) << "Cannot unmap " << dm_user_name;
2670 return false;
2671 }
2672 }
2673
2674 if (EnsureSnapuserdConnected()) {
2675 if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2676 LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2677 return false;
2678 }
2679 }
2680
2681 // Ensure the control device is gone so we don't run into ABA problems.
2682 auto control_device = "/dev/dm-user/" + dm_user_name;
2683 if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2684 LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2685 return false;
2686 }
2687 return true;
2688 }
2689
MapAllSnapshots(const std::chrono::milliseconds & timeout_ms)2690 bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds& timeout_ms) {
2691 auto lock = LockExclusive();
2692 if (!lock) return false;
2693
2694 auto state = ReadUpdateState(lock.get());
2695 if (state == UpdateState::Unverified) {
2696 if (GetCurrentSlot() == Slot::Target) {
2697 LOG(ERROR) << "Cannot call MapAllSnapshots when booting from the target slot.";
2698 return false;
2699 }
2700 } else if (state != UpdateState::Initiated) {
2701 LOG(ERROR) << "Cannot call MapAllSnapshots from update state: " << state;
2702 return false;
2703 }
2704
2705 std::vector<std::string> snapshots;
2706 if (!ListSnapshots(lock.get(), &snapshots)) {
2707 return false;
2708 }
2709
2710 const auto& opener = device_->GetPartitionOpener();
2711 auto slot_suffix = device_->GetOtherSlotSuffix();
2712 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2713 auto super_device = device_->GetSuperDevice(slot_number);
2714 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot_number);
2715 if (!metadata) {
2716 LOG(ERROR) << "MapAllSnapshots could not read dynamic partition metadata for device: "
2717 << super_device;
2718 return false;
2719 }
2720
2721 for (const auto& snapshot : snapshots) {
2722 if (!UnmapPartitionWithSnapshot(lock.get(), snapshot)) {
2723 LOG(ERROR) << "MapAllSnapshots could not unmap snapshot: " << snapshot;
2724 return false;
2725 }
2726
2727 CreateLogicalPartitionParams params = {
2728 .block_device = super_device,
2729 .metadata = metadata.get(),
2730 .partition_name = snapshot,
2731 .partition_opener = &opener,
2732 .timeout_ms = timeout_ms,
2733 };
2734 if (!MapPartitionWithSnapshot(lock.get(), std::move(params), SnapshotContext::Mount,
2735 nullptr)) {
2736 LOG(ERROR) << "MapAllSnapshots failed to map: " << snapshot;
2737 return false;
2738 }
2739 }
2740
2741 LOG(INFO) << "MapAllSnapshots succeeded.";
2742 return true;
2743 }
2744
UnmapAllSnapshots()2745 bool SnapshotManager::UnmapAllSnapshots() {
2746 auto lock = LockExclusive();
2747 if (!lock) return false;
2748
2749 return UnmapAllSnapshots(lock.get());
2750 }
2751
UnmapAllSnapshots(LockedFile * lock)2752 bool SnapshotManager::UnmapAllSnapshots(LockedFile* lock) {
2753 std::vector<std::string> snapshots;
2754 if (!ListSnapshots(lock, &snapshots)) {
2755 return false;
2756 }
2757
2758 for (const auto& snapshot : snapshots) {
2759 if (!UnmapPartitionWithSnapshot(lock, snapshot)) {
2760 LOG(ERROR) << "Failed to unmap snapshot: " << snapshot;
2761 return false;
2762 }
2763 }
2764
2765 // Terminate the daemon and release the snapuserd_client_ object.
2766 // If we need to re-connect with the daemon, EnsureSnapuserdConnected()
2767 // will re-create the object and establish the socket connection.
2768 if (snapuserd_client_) {
2769 LOG(INFO) << "Shutdown snapuserd daemon";
2770 snapuserd_client_->DetachSnapuserd();
2771 snapuserd_client_->CloseConnection();
2772 snapuserd_client_ = nullptr;
2773 }
2774
2775 return true;
2776 }
2777
OpenFile(const std::string & file,int lock_flags)2778 auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
2779 -> std::unique_ptr<LockedFile> {
2780 unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2781 if (fd < 0) {
2782 PLOG(ERROR) << "Open failed: " << file;
2783 return nullptr;
2784 }
2785 if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
2786 PLOG(ERROR) << "Acquire flock failed: " << file;
2787 return nullptr;
2788 }
2789 // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
2790 // calls, so strip extra flags.
2791 int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
2792 return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
2793 }
2794
~LockedFile()2795 SnapshotManager::LockedFile::~LockedFile() {
2796 if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
2797 PLOG(ERROR) << "Failed to unlock file: " << path_;
2798 }
2799 }
2800
GetStateFilePath() const2801 std::string SnapshotManager::GetStateFilePath() const {
2802 return metadata_dir_ + "/state"s;
2803 }
2804
GetMergeStateFilePath() const2805 std::string SnapshotManager::GetMergeStateFilePath() const {
2806 return metadata_dir_ + "/merge_state"s;
2807 }
2808
GetLockPath() const2809 std::string SnapshotManager::GetLockPath() const {
2810 return metadata_dir_;
2811 }
2812
OpenLock(int lock_flags)2813 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
2814 auto lock_file = GetLockPath();
2815 return OpenFile(lock_file, lock_flags);
2816 }
2817
LockShared()2818 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
2819 return OpenLock(LOCK_SH);
2820 }
2821
LockExclusive()2822 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
2823 return OpenLock(LOCK_EX);
2824 }
2825
UpdateStateFromString(const std::string & contents)2826 static UpdateState UpdateStateFromString(const std::string& contents) {
2827 if (contents.empty() || contents == "none") {
2828 return UpdateState::None;
2829 } else if (contents == "initiated") {
2830 return UpdateState::Initiated;
2831 } else if (contents == "unverified") {
2832 return UpdateState::Unverified;
2833 } else if (contents == "merging") {
2834 return UpdateState::Merging;
2835 } else if (contents == "merge-completed") {
2836 return UpdateState::MergeCompleted;
2837 } else if (contents == "merge-needs-reboot") {
2838 return UpdateState::MergeNeedsReboot;
2839 } else if (contents == "merge-failed") {
2840 return UpdateState::MergeFailed;
2841 } else if (contents == "cancelled") {
2842 return UpdateState::Cancelled;
2843 } else {
2844 LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
2845 return UpdateState::None;
2846 }
2847 }
2848
operator <<(std::ostream & os,UpdateState state)2849 std::ostream& operator<<(std::ostream& os, UpdateState state) {
2850 switch (state) {
2851 case UpdateState::None:
2852 return os << "none";
2853 case UpdateState::Initiated:
2854 return os << "initiated";
2855 case UpdateState::Unverified:
2856 return os << "unverified";
2857 case UpdateState::Merging:
2858 return os << "merging";
2859 case UpdateState::MergeCompleted:
2860 return os << "merge-completed";
2861 case UpdateState::MergeNeedsReboot:
2862 return os << "merge-needs-reboot";
2863 case UpdateState::MergeFailed:
2864 return os << "merge-failed";
2865 case UpdateState::Cancelled:
2866 return os << "cancelled";
2867 default:
2868 LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
2869 return os;
2870 }
2871 }
2872
ReadUpdateState(LockedFile * lock)2873 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
2874 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
2875 return status.state();
2876 }
2877
ReadSnapshotUpdateStatus(LockedFile * lock)2878 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
2879 CHECK(lock);
2880
2881 SnapshotUpdateStatus status = {};
2882 std::string contents;
2883 if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
2884 PLOG(ERROR) << "Read state file failed";
2885 status.set_state(UpdateState::None);
2886 return status;
2887 }
2888
2889 if (!status.ParseFromString(contents)) {
2890 LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
2891
2892 // Try to rollback to legacy file to support devices that are
2893 // currently using the old file format.
2894 // TODO(b/147409432)
2895 status.set_state(UpdateStateFromString(contents));
2896 }
2897
2898 return status;
2899 }
2900
WriteUpdateState(LockedFile * lock,UpdateState state,MergeFailureCode failure_code)2901 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state,
2902 MergeFailureCode failure_code) {
2903 SnapshotUpdateStatus status;
2904 status.set_state(state);
2905
2906 switch (state) {
2907 case UpdateState::MergeFailed:
2908 status.set_merge_failure_code(failure_code);
2909 break;
2910 case UpdateState::Initiated:
2911 status.set_source_build_fingerprint(
2912 android::base::GetProperty("ro.build.fingerprint", ""));
2913 break;
2914 default:
2915 break;
2916 }
2917
2918 // If we're transitioning between two valid states (eg, we're not beginning
2919 // or ending an OTA), then make sure to propagate the compression bit and
2920 // build fingerprint.
2921 if (!(state == UpdateState::Initiated || state == UpdateState::None)) {
2922 SnapshotUpdateStatus old_status = ReadSnapshotUpdateStatus(lock);
2923 status.set_compression_enabled(old_status.compression_enabled());
2924 status.set_source_build_fingerprint(old_status.source_build_fingerprint());
2925 status.set_merge_phase(old_status.merge_phase());
2926 status.set_userspace_snapshots(old_status.userspace_snapshots());
2927 status.set_io_uring_enabled(old_status.io_uring_enabled());
2928 }
2929 return WriteSnapshotUpdateStatus(lock, status);
2930 }
2931
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)2932 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
2933 const SnapshotUpdateStatus& status) {
2934 CHECK(lock);
2935 CHECK(lock->lock_mode() == LOCK_EX);
2936
2937 std::string contents;
2938 if (!status.SerializeToString(&contents)) {
2939 LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
2940 return false;
2941 }
2942
2943 #ifdef LIBSNAPSHOT_USE_HAL
2944 auto merge_status = MergeStatus::UNKNOWN;
2945 switch (status.state()) {
2946 // The needs-reboot and completed cases imply that /data and /metadata
2947 // can be safely wiped, so we don't report a merge status.
2948 case UpdateState::None:
2949 case UpdateState::MergeNeedsReboot:
2950 case UpdateState::MergeCompleted:
2951 case UpdateState::Initiated:
2952 merge_status = MergeStatus::NONE;
2953 break;
2954 case UpdateState::Unverified:
2955 merge_status = MergeStatus::SNAPSHOTTED;
2956 break;
2957 case UpdateState::Merging:
2958 case UpdateState::MergeFailed:
2959 merge_status = MergeStatus::MERGING;
2960 break;
2961 default:
2962 // Note that Cancelled flows to here - it is never written, since
2963 // it only communicates a transient state to the caller.
2964 LOG(ERROR) << "Unexpected update status: " << status.state();
2965 break;
2966 }
2967
2968 bool set_before_write =
2969 merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
2970 if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2971 return false;
2972 }
2973 #endif
2974
2975 if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
2976 PLOG(ERROR) << "Could not write to state file";
2977 return false;
2978 }
2979
2980 #ifdef LIBSNAPSHOT_USE_HAL
2981 if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2982 return false;
2983 }
2984 #endif
2985 return true;
2986 }
2987
GetSnapshotStatusFilePath(const std::string & name)2988 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
2989 auto file = metadata_dir_ + "/snapshots/"s + name;
2990 return file;
2991 }
2992
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)2993 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
2994 SnapshotStatus* status) {
2995 CHECK(lock);
2996 auto path = GetSnapshotStatusFilePath(name);
2997
2998 unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2999 if (fd < 0) {
3000 PLOG(ERROR) << "Open failed: " << path;
3001 return false;
3002 }
3003
3004 if (!status->ParseFromFileDescriptor(fd.get())) {
3005 PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
3006 return false;
3007 }
3008
3009 if (status->name() != name) {
3010 LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
3011 status->set_name(name);
3012 }
3013
3014 return true;
3015 }
3016
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)3017 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
3018 // The caller must take an exclusive lock to modify snapshots.
3019 CHECK(lock);
3020 CHECK(lock->lock_mode() == LOCK_EX);
3021 CHECK(!status.name().empty());
3022
3023 auto path = GetSnapshotStatusFilePath(status.name());
3024
3025 std::string content;
3026 if (!status.SerializeToString(&content)) {
3027 LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
3028 return false;
3029 }
3030
3031 if (!WriteStringToFileAtomic(content, path)) {
3032 PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
3033 return false;
3034 }
3035
3036 return true;
3037 }
3038
EnsureImageManager()3039 bool SnapshotManager::EnsureImageManager() {
3040 if (images_) return true;
3041
3042 images_ = device_->OpenImageManager();
3043 if (!images_) {
3044 LOG(ERROR) << "Could not open ImageManager";
3045 return false;
3046 }
3047 return true;
3048 }
3049
EnsureSnapuserdConnected()3050 bool SnapshotManager::EnsureSnapuserdConnected() {
3051 if (snapuserd_client_) {
3052 return true;
3053 }
3054
3055 if (!use_first_stage_snapuserd_ && !EnsureSnapuserdStarted()) {
3056 return false;
3057 }
3058
3059 snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
3060 if (!snapuserd_client_) {
3061 LOG(ERROR) << "Unable to connect to snapuserd";
3062 return false;
3063 }
3064 return true;
3065 }
3066
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)3067 void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
3068 std::vector<std::string> to_delete;
3069 for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
3070 if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
3071 LOG(WARNING) << existing_cow_partition->name()
3072 << " cannot be unmapped and its space cannot be reclaimed";
3073 continue;
3074 }
3075 to_delete.push_back(existing_cow_partition->name());
3076 }
3077 for (const auto& name : to_delete) {
3078 current_metadata->RemovePartition(name);
3079 }
3080 }
3081
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3082 static Return AddRequiredSpace(Return orig,
3083 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3084 if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
3085 return orig;
3086 }
3087 uint64_t sum = 0;
3088 for (auto&& [name, status] : all_snapshot_status) {
3089 sum += status.cow_file_size();
3090 }
3091 return Return::NoSpace(sum);
3092 }
3093
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)3094 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
3095 auto lock = LockExclusive();
3096 if (!lock) return Return::Error();
3097
3098 auto update_state = ReadUpdateState(lock.get());
3099 if (update_state != UpdateState::Initiated) {
3100 LOG(ERROR) << "Cannot create update snapshots in state " << update_state;
3101 return Return::Error();
3102 }
3103
3104 // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
3105 // partition takes up a big chunk of space in super, causing COW images to be created on
3106 // retrofit Virtual A/B devices.
3107 if (device_->IsOverlayfsSetup()) {
3108 LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
3109 << ", reboot, then try again.";
3110 return Return::Error();
3111 }
3112
3113 const auto& opener = device_->GetPartitionOpener();
3114 auto current_suffix = device_->GetSlotSuffix();
3115 uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
3116 auto target_suffix = device_->GetOtherSlotSuffix();
3117 uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
3118 auto current_super = device_->GetSuperDevice(current_slot);
3119
3120 auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
3121 if (current_metadata == nullptr) {
3122 LOG(ERROR) << "Cannot create metadata builder.";
3123 return Return::Error();
3124 }
3125
3126 auto target_metadata =
3127 MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
3128 if (target_metadata == nullptr) {
3129 LOG(ERROR) << "Cannot create target metadata builder.";
3130 return Return::Error();
3131 }
3132
3133 // Delete partitions with target suffix in |current_metadata|. Otherwise,
3134 // partition_cow_creator recognizes these left-over partitions as used space.
3135 for (const auto& group_name : current_metadata->ListGroups()) {
3136 if (android::base::EndsWith(group_name, target_suffix)) {
3137 current_metadata->RemoveGroupAndPartitions(group_name);
3138 }
3139 }
3140
3141 SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
3142 if (!metadata_updater.Update()) {
3143 LOG(ERROR) << "Cannot calculate new metadata.";
3144 return Return::Error();
3145 }
3146
3147 // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
3148 // free regions.
3149 UnmapAndDeleteCowPartition(current_metadata.get());
3150
3151 // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
3152 // devices with retrofit dynamic partitions does not make sense.
3153 // This ensures that current_metadata->GetFreeRegions() uses the same device
3154 // indices as target_metadata (i.e. 0 -> "super").
3155 // This is also assumed in MapCowDevices() call below.
3156 CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
3157 target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
3158
3159 std::map<std::string, SnapshotStatus> all_snapshot_status;
3160
3161 // In case of error, automatically delete devices that are created along the way.
3162 // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
3163 // these devices.
3164 AutoDeviceList created_devices;
3165
3166 const auto& dap_metadata = manifest.dynamic_partition_metadata();
3167 CowOptions options;
3168 CowWriter writer(options);
3169 bool cow_format_support = true;
3170 if (dap_metadata.cow_version() < writer.GetCowVersion()) {
3171 cow_format_support = false;
3172 }
3173
3174 LOG(INFO) << " dap_metadata.cow_version(): " << dap_metadata.cow_version()
3175 << " writer.GetCowVersion(): " << writer.GetCowVersion();
3176
3177 bool use_compression = IsCompressionEnabled() && dap_metadata.vabc_enabled() &&
3178 !device_->IsRecovery() && cow_format_support;
3179
3180 std::string compression_algorithm;
3181 if (use_compression) {
3182 compression_algorithm = dap_metadata.vabc_compression_param();
3183 if (compression_algorithm.empty()) {
3184 // Older OTAs don't set an explicit compression type, so default to gz.
3185 compression_algorithm = "gz";
3186 }
3187 } else {
3188 compression_algorithm = "none";
3189 }
3190
3191 PartitionCowCreator cow_creator{
3192 .target_metadata = target_metadata.get(),
3193 .target_suffix = target_suffix,
3194 .target_partition = nullptr,
3195 .current_metadata = current_metadata.get(),
3196 .current_suffix = current_suffix,
3197 .update = nullptr,
3198 .extra_extents = {},
3199 .compression_enabled = use_compression,
3200 .compression_algorithm = compression_algorithm,
3201 };
3202
3203 auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
3204 &all_snapshot_status);
3205 if (!ret.is_ok()) return ret;
3206
3207 auto exported_target_metadata = target_metadata->Export();
3208 if (exported_target_metadata == nullptr) {
3209 LOG(ERROR) << "Cannot export target metadata";
3210 return Return::Error();
3211 }
3212
3213 ret = InitializeUpdateSnapshots(lock.get(), target_metadata.get(),
3214 exported_target_metadata.get(), target_suffix,
3215 all_snapshot_status);
3216 if (!ret.is_ok()) return ret;
3217
3218 if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
3219 *exported_target_metadata, target_slot)) {
3220 LOG(ERROR) << "Cannot write target metadata";
3221 return Return::Error();
3222 }
3223
3224 // If compression is enabled, we need to retain a copy of the old metadata
3225 // so we can access original blocks in case they are moved around. We do
3226 // not want to rely on the old super metadata slot because we don't
3227 // guarantee its validity after the slot switch is successful.
3228 if (cow_creator.compression_enabled) {
3229 auto metadata = current_metadata->Export();
3230 if (!metadata) {
3231 LOG(ERROR) << "Could not export current metadata";
3232 return Return::Error();
3233 }
3234
3235 auto path = GetOldPartitionMetadataPath();
3236 if (!android::fs_mgr::WriteToImageFile(path, *metadata.get())) {
3237 LOG(ERROR) << "Cannot write old metadata to " << path;
3238 return Return::Error();
3239 }
3240 }
3241
3242 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3243 status.set_state(update_state);
3244 status.set_compression_enabled(cow_creator.compression_enabled);
3245 if (cow_creator.compression_enabled) {
3246 if (!device()->IsTestDevice()) {
3247 bool userSnapshotsEnabled = IsUserspaceSnapshotsEnabled();
3248 const std::string UNKNOWN = "unknown";
3249 const std::string vendor_release = android::base::GetProperty(
3250 "ro.vendor.build.version.release_or_codename", UNKNOWN);
3251
3252 // No user-space snapshots if vendor partition is on Android 12
3253 if (vendor_release.find("12") != std::string::npos) {
3254 LOG(INFO) << "Userspace snapshots disabled as vendor partition is on Android: "
3255 << vendor_release;
3256 userSnapshotsEnabled = false;
3257 }
3258
3259 // Userspace snapshots is enabled only if compression is enabled
3260 status.set_userspace_snapshots(userSnapshotsEnabled);
3261 if (userSnapshotsEnabled) {
3262 is_snapshot_userspace_ = true;
3263 status.set_io_uring_enabled(IsIouringEnabled());
3264 LOG(INFO) << "Userspace snapshots enabled";
3265 } else {
3266 is_snapshot_userspace_ = false;
3267 LOG(INFO) << "Userspace snapshots disabled";
3268 }
3269
3270 // Terminate stale daemon if any
3271 std::unique_ptr<SnapuserdClient> snapuserd_client =
3272 SnapuserdClient::Connect(kSnapuserdSocket, 5s);
3273 if (snapuserd_client) {
3274 snapuserd_client->DetachSnapuserd();
3275 snapuserd_client->CloseConnection();
3276 snapuserd_client = nullptr;
3277 }
3278
3279 // Clear the cached client if any
3280 if (snapuserd_client_) {
3281 snapuserd_client_->CloseConnection();
3282 snapuserd_client_ = nullptr;
3283 }
3284 } else {
3285 bool userSnapshotsEnabled = true;
3286 const std::string UNKNOWN = "unknown";
3287 const std::string vendor_release = android::base::GetProperty(
3288 "ro.vendor.build.version.release_or_codename", UNKNOWN);
3289
3290 // No user-space snapshots if vendor partition is on Android 12
3291 if (vendor_release.find("12") != std::string::npos) {
3292 LOG(INFO) << "Userspace snapshots disabled as vendor partition is on Android: "
3293 << vendor_release;
3294 userSnapshotsEnabled = false;
3295 }
3296
3297 userSnapshotsEnabled = (userSnapshotsEnabled && !IsDmSnapshotTestingEnabled());
3298 status.set_userspace_snapshots(userSnapshotsEnabled);
3299 if (!userSnapshotsEnabled) {
3300 is_snapshot_userspace_ = false;
3301 LOG(INFO) << "User-space snapshots disabled for testing";
3302 } else {
3303 is_snapshot_userspace_ = true;
3304 LOG(INFO) << "User-space snapshots enabled for testing";
3305 }
3306 }
3307 }
3308 if (!WriteSnapshotUpdateStatus(lock.get(), status)) {
3309 LOG(ERROR) << "Unable to write new update state";
3310 return Return::Error();
3311 }
3312
3313 created_devices.Release();
3314 LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
3315
3316 return Return::Ok();
3317 }
3318
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)3319 Return SnapshotManager::CreateUpdateSnapshotsInternal(
3320 LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
3321 AutoDeviceList* created_devices,
3322 std::map<std::string, SnapshotStatus>* all_snapshot_status) {
3323 CHECK(lock);
3324
3325 auto* target_metadata = cow_creator->target_metadata;
3326 const auto& target_suffix = cow_creator->target_suffix;
3327
3328 if (!target_metadata->AddGroup(kCowGroupName, 0)) {
3329 LOG(ERROR) << "Cannot add group " << kCowGroupName;
3330 return Return::Error();
3331 }
3332
3333 std::map<std::string, const PartitionUpdate*> partition_map;
3334 std::map<std::string, std::vector<Extent>> extra_extents_map;
3335 for (const auto& partition_update : manifest.partitions()) {
3336 auto suffixed_name = partition_update.partition_name() + target_suffix;
3337 auto&& [it, inserted] = partition_map.emplace(suffixed_name, &partition_update);
3338 if (!inserted) {
3339 LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
3340 << " in update manifest.";
3341 return Return::Error();
3342 }
3343
3344 auto& extra_extents = extra_extents_map[suffixed_name];
3345 if (partition_update.has_hash_tree_extent()) {
3346 extra_extents.push_back(partition_update.hash_tree_extent());
3347 }
3348 if (partition_update.has_fec_extent()) {
3349 extra_extents.push_back(partition_update.fec_extent());
3350 }
3351 }
3352
3353 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3354 cow_creator->target_partition = target_partition;
3355 cow_creator->update = nullptr;
3356 auto iter = partition_map.find(target_partition->name());
3357 if (iter != partition_map.end()) {
3358 cow_creator->update = iter->second;
3359 } else {
3360 LOG(INFO) << target_partition->name()
3361 << " isn't included in the payload, skipping the cow creation.";
3362 continue;
3363 }
3364
3365 cow_creator->extra_extents.clear();
3366 auto extra_extents_it = extra_extents_map.find(target_partition->name());
3367 if (extra_extents_it != extra_extents_map.end()) {
3368 cow_creator->extra_extents = std::move(extra_extents_it->second);
3369 }
3370
3371 // Compute the device sizes for the partition.
3372 auto cow_creator_ret = cow_creator->Run();
3373 if (!cow_creator_ret.has_value()) {
3374 LOG(ERROR) << "PartitionCowCreator returned no value for " << target_partition->name();
3375 return Return::Error();
3376 }
3377
3378 LOG(INFO) << "For partition " << target_partition->name()
3379 << ", device size = " << cow_creator_ret->snapshot_status.device_size()
3380 << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
3381 << ", cow partition size = "
3382 << cow_creator_ret->snapshot_status.cow_partition_size()
3383 << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
3384
3385 // Delete any existing snapshot before re-creating one.
3386 if (!DeleteSnapshot(lock, target_partition->name())) {
3387 LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
3388 << target_partition->name();
3389 return Return::Error();
3390 }
3391
3392 // It is possible that the whole partition uses free space in super, and snapshot / COW
3393 // would not be needed. In this case, skip the partition.
3394 bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
3395 bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
3396 cow_creator_ret->snapshot_status.cow_file_size()) > 0;
3397 CHECK(needs_snapshot == needs_cow);
3398
3399 if (!needs_snapshot) {
3400 LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
3401 << "because nothing needs to be snapshotted.";
3402 continue;
3403 }
3404
3405 // Find the original partition size.
3406 auto name = target_partition->name();
3407 auto old_partition_name =
3408 name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix;
3409 auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name);
3410 if (old_partition) {
3411 cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size());
3412 }
3413
3414 // Store these device sizes to snapshot status file.
3415 if (!CreateSnapshot(lock, cow_creator, &cow_creator_ret->snapshot_status)) {
3416 return Return::Error();
3417 }
3418 created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
3419
3420 // Create the COW partition. That is, use any remaining free space in super partition before
3421 // creating the COW images.
3422 if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
3423 CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
3424 << "cow_partition_size == "
3425 << cow_creator_ret->snapshot_status.cow_partition_size()
3426 << " is not a multiple of sector size " << kSectorSize;
3427 auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
3428 kCowGroupName, 0 /* flags */);
3429 if (cow_partition == nullptr) {
3430 return Return::Error();
3431 }
3432
3433 if (!target_metadata->ResizePartition(
3434 cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
3435 cow_creator_ret->cow_partition_usable_regions)) {
3436 LOG(ERROR) << "Cannot create COW partition on metadata with size "
3437 << cow_creator_ret->snapshot_status.cow_partition_size();
3438 return Return::Error();
3439 }
3440 // Only the in-memory target_metadata is modified; nothing to clean up if there is an
3441 // error in the future.
3442 }
3443
3444 all_snapshot_status->emplace(target_partition->name(),
3445 std::move(cow_creator_ret->snapshot_status));
3446
3447 LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
3448 }
3449
3450 LOG(INFO) << "Allocating CoW images.";
3451
3452 for (auto&& [name, snapshot_status] : *all_snapshot_status) {
3453 // Create the backing COW image if necessary.
3454 if (snapshot_status.cow_file_size() > 0) {
3455 auto ret = CreateCowImage(lock, name);
3456 if (!ret.is_ok()) return AddRequiredSpace(ret, *all_snapshot_status);
3457 }
3458
3459 LOG(INFO) << "Successfully created snapshot for " << name;
3460 }
3461
3462 return Return::Ok();
3463 }
3464
InitializeUpdateSnapshots(LockedFile * lock,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3465 Return SnapshotManager::InitializeUpdateSnapshots(
3466 LockedFile* lock, MetadataBuilder* target_metadata,
3467 const LpMetadata* exported_target_metadata, const std::string& target_suffix,
3468 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3469 CHECK(lock);
3470
3471 CreateLogicalPartitionParams cow_params{
3472 .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
3473 .metadata = exported_target_metadata,
3474 .timeout_ms = std::chrono::milliseconds::max(),
3475 .partition_opener = &device_->GetPartitionOpener(),
3476 };
3477 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3478 AutoDeviceList created_devices_for_cow;
3479
3480 if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
3481 LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
3482 << target_partition->name();
3483 return Return::Error();
3484 }
3485
3486 auto it = all_snapshot_status.find(target_partition->name());
3487 if (it == all_snapshot_status.end()) continue;
3488 cow_params.partition_name = target_partition->name();
3489 std::string cow_name;
3490 if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
3491 return Return::Error();
3492 }
3493
3494 std::string cow_path;
3495 if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
3496 LOG(ERROR) << "Cannot determine path for " << cow_name;
3497 return Return::Error();
3498 }
3499
3500 if (it->second.compression_enabled()) {
3501 unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3502 if (fd < 0) {
3503 PLOG(ERROR) << "open " << cow_path << " failed for snapshot "
3504 << cow_params.partition_name;
3505 return Return::Error();
3506 }
3507
3508 CowOptions options;
3509 if (device()->IsTestDevice()) {
3510 options.scratch_space = false;
3511 }
3512 options.compression = it->second.compression_algorithm();
3513
3514 CowWriter writer(options);
3515 if (!writer.Initialize(fd) || !writer.Finalize()) {
3516 LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
3517 return Return::Error();
3518 }
3519 } else {
3520 auto ret = InitializeKernelCow(cow_path);
3521 if (!ret.is_ok()) {
3522 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
3523 << cow_path;
3524 return AddRequiredSpace(ret, all_snapshot_status);
3525 }
3526 }
3527 // Let destructor of created_devices_for_cow to unmap the COW devices.
3528 };
3529 return Return::Ok();
3530 }
3531
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)3532 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
3533 std::string* snapshot_path) {
3534 auto lock = LockShared();
3535 if (!lock) return false;
3536 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3537 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3538 << params.GetPartitionName();
3539 return false;
3540 }
3541
3542 SnapshotStatus status;
3543 if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3544 return false;
3545 }
3546 if (status.compression_enabled()) {
3547 LOG(ERROR) << "Cannot use MapUpdateSnapshot with compressed snapshots";
3548 return false;
3549 }
3550
3551 SnapshotPaths paths;
3552 if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3553 return false;
3554 }
3555
3556 if (!paths.snapshot_device.empty()) {
3557 *snapshot_path = paths.snapshot_device;
3558 } else {
3559 *snapshot_path = paths.target_device;
3560 }
3561 DCHECK(!snapshot_path->empty());
3562 return true;
3563 }
3564
OpenSnapshotWriter(const android::fs_mgr::CreateLogicalPartitionParams & params,const std::optional<std::string> & source_device)3565 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenSnapshotWriter(
3566 const android::fs_mgr::CreateLogicalPartitionParams& params,
3567 const std::optional<std::string>& source_device) {
3568 #if defined(LIBSNAPSHOT_NO_COW_WRITE)
3569 (void)params;
3570 (void)source_device;
3571
3572 LOG(ERROR) << "Snapshots cannot be written in first-stage init or recovery";
3573 return nullptr;
3574 #else
3575 // First unmap any existing mapping.
3576 auto lock = LockShared();
3577 if (!lock) return nullptr;
3578 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3579 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3580 << params.GetPartitionName();
3581 return nullptr;
3582 }
3583
3584 SnapshotPaths paths;
3585 if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3586 return nullptr;
3587 }
3588
3589 SnapshotStatus status;
3590 if (!paths.cow_device_name.empty()) {
3591 if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3592 return nullptr;
3593 }
3594 } else {
3595 // Currently, partition_cow_creator always creates snapshots. The
3596 // reason is that if partition X shrinks while partition Y grows, we
3597 // cannot bindly write to the newly freed extents in X. This would
3598 // make the old slot unusable. So, the entire size of the target
3599 // partition is currently considered snapshottable.
3600 LOG(ERROR) << "No snapshot available for partition " << params.GetPartitionName();
3601 return nullptr;
3602 }
3603
3604 if (status.compression_enabled()) {
3605 return OpenCompressedSnapshotWriter(lock.get(), source_device, params.GetPartitionName(),
3606 status, paths);
3607 }
3608 return OpenKernelSnapshotWriter(lock.get(), source_device, params.GetPartitionName(), status,
3609 paths);
3610 #endif
3611 }
3612
3613 #if !defined(LIBSNAPSHOT_NO_COW_WRITE)
OpenCompressedSnapshotWriter(LockedFile * lock,const std::optional<std::string> & source_device,const std::string & partition_name,const SnapshotStatus & status,const SnapshotPaths & paths)3614 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenCompressedSnapshotWriter(
3615 LockedFile* lock, const std::optional<std::string>& source_device,
3616 [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3617 const SnapshotPaths& paths) {
3618 CHECK(lock);
3619
3620 CowOptions cow_options;
3621 cow_options.compression = status.compression_algorithm();
3622 cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3623 // Disable scratch space for vts tests
3624 if (device()->IsTestDevice()) {
3625 cow_options.scratch_space = false;
3626 }
3627
3628 // Currently we don't support partial snapshots, since partition_cow_creator
3629 // never creates this scenario.
3630 CHECK(status.snapshot_size() == status.device_size());
3631
3632 auto writer = std::make_unique<CompressedSnapshotWriter>(cow_options);
3633 if (source_device) {
3634 writer->SetSourceDevice(*source_device);
3635 }
3636
3637 std::string cow_path;
3638 if (!GetMappedImageDevicePath(paths.cow_device_name, &cow_path)) {
3639 LOG(ERROR) << "Could not determine path for " << paths.cow_device_name;
3640 return nullptr;
3641 }
3642
3643 unique_fd cow_fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3644 if (cow_fd < 0) {
3645 PLOG(ERROR) << "OpenCompressedSnapshotWriter: open " << cow_path;
3646 return nullptr;
3647 }
3648 if (!writer->SetCowDevice(std::move(cow_fd))) {
3649 LOG(ERROR) << "Could not create COW writer from " << cow_path;
3650 return nullptr;
3651 }
3652
3653 return writer;
3654 }
3655
OpenKernelSnapshotWriter(LockedFile * lock,const std::optional<std::string> & source_device,const std::string & partition_name,const SnapshotStatus & status,const SnapshotPaths & paths)3656 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenKernelSnapshotWriter(
3657 LockedFile* lock, const std::optional<std::string>& source_device,
3658 [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3659 const SnapshotPaths& paths) {
3660 CHECK(lock);
3661
3662 CowOptions cow_options;
3663 cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3664
3665 auto writer = std::make_unique<OnlineKernelSnapshotWriter>(cow_options);
3666
3667 std::string path = paths.snapshot_device.empty() ? paths.target_device : paths.snapshot_device;
3668 unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC));
3669 if (fd < 0) {
3670 PLOG(ERROR) << "open failed: " << path;
3671 return nullptr;
3672 }
3673
3674 if (source_device) {
3675 writer->SetSourceDevice(*source_device);
3676 }
3677
3678 uint64_t cow_size = status.cow_partition_size() + status.cow_file_size();
3679 writer->SetSnapshotDevice(std::move(fd), cow_size);
3680
3681 return writer;
3682 }
3683 #endif // !defined(LIBSNAPSHOT_NO_COW_WRITE)
3684
UnmapUpdateSnapshot(const std::string & target_partition_name)3685 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
3686 auto lock = LockShared();
3687 if (!lock) return false;
3688 return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
3689 }
3690
UnmapAllPartitionsInRecovery()3691 bool SnapshotManager::UnmapAllPartitionsInRecovery() {
3692 auto lock = LockExclusive();
3693 if (!lock) return false;
3694
3695 const auto& opener = device_->GetPartitionOpener();
3696 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3697 auto super_device = device_->GetSuperDevice(slot);
3698 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
3699 if (!metadata) {
3700 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
3701 return false;
3702 }
3703
3704 bool ok = true;
3705 for (const auto& partition : metadata->partitions) {
3706 auto partition_name = GetPartitionName(partition);
3707 ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
3708 }
3709 return ok;
3710 }
3711
operator <<(std::ostream & os,SnapshotManager::Slot slot)3712 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
3713 switch (slot) {
3714 case SnapshotManager::Slot::Unknown:
3715 return os << "unknown";
3716 case SnapshotManager::Slot::Source:
3717 return os << "source";
3718 case SnapshotManager::Slot::Target:
3719 return os << "target";
3720 }
3721 }
3722
Dump(std::ostream & os)3723 bool SnapshotManager::Dump(std::ostream& os) {
3724 // Don't actually lock. Dump() is for debugging purposes only, so it is okay
3725 // if it is racy.
3726 auto file = OpenLock(0 /* lock flag */);
3727 if (!file) return false;
3728
3729 std::stringstream ss;
3730
3731 auto update_status = ReadSnapshotUpdateStatus(file.get());
3732
3733 ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
3734 ss << "Compression: " << update_status.compression_enabled() << std::endl;
3735 ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
3736 ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
3737 ss << "Rollback indicator: "
3738 << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3739 << std::endl;
3740 ss << "Forward merge indicator: "
3741 << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3742 << std::endl;
3743 ss << "Source build fingerprint: " << update_status.source_build_fingerprint() << std::endl;
3744
3745 bool ok = true;
3746 std::vector<std::string> snapshots;
3747 if (!ListSnapshots(file.get(), &snapshots)) {
3748 LOG(ERROR) << "Could not list snapshots";
3749 snapshots.clear();
3750 ok = false;
3751 }
3752 for (const auto& name : snapshots) {
3753 ss << "Snapshot: " << name << std::endl;
3754 SnapshotStatus status;
3755 if (!ReadSnapshotStatus(file.get(), name, &status)) {
3756 ok = false;
3757 continue;
3758 }
3759 ss << " state: " << SnapshotState_Name(status.state()) << std::endl;
3760 ss << " device size (bytes): " << status.device_size() << std::endl;
3761 ss << " snapshot size (bytes): " << status.snapshot_size() << std::endl;
3762 ss << " cow partition size (bytes): " << status.cow_partition_size() << std::endl;
3763 ss << " cow file size (bytes): " << status.cow_file_size() << std::endl;
3764 ss << " allocated sectors: " << status.sectors_allocated() << std::endl;
3765 ss << " metadata sectors: " << status.metadata_sectors() << std::endl;
3766 ss << " compression: " << status.compression_algorithm() << std::endl;
3767 }
3768 os << ss.rdbuf();
3769 return ok;
3770 }
3771
EnsureMetadataMounted()3772 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
3773 if (!device_->IsRecovery()) {
3774 // No need to mount anything in recovery.
3775 LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
3776 return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
3777 }
3778 auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
3779 if (ret == nullptr) return nullptr;
3780
3781 // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
3782 // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
3783 // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
3784 // treat this case as if /metadata is not mounted.
3785 if (!LockShared()) {
3786 LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
3787 "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
3788 return nullptr;
3789 }
3790 return ret;
3791 }
3792
HandleImminentDataWipe(const std::function<void ()> & callback)3793 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
3794 if (!device_->IsRecovery()) {
3795 LOG(ERROR) << "Data wipes are only allowed in recovery.";
3796 return false;
3797 }
3798
3799 auto mount = EnsureMetadataMounted();
3800 if (!mount || !mount->HasDevice()) {
3801 // We allow the wipe to continue, because if we can't mount /metadata,
3802 // it is unlikely the device would have booted anyway. If there is no
3803 // metadata partition, then the device predates Virtual A/B.
3804 return true;
3805 }
3806
3807 // Check this early, so we don't accidentally start trying to populate
3808 // the state file in recovery. Note we don't call GetUpdateState since
3809 // we want errors in acquiring the lock to be propagated, instead of
3810 // returning UpdateState::None.
3811 auto state_file = GetStateFilePath();
3812 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3813 return true;
3814 }
3815
3816 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3817 auto super_path = device_->GetSuperDevice(slot_number);
3818 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3819 LOG(ERROR) << "Unable to map partitions to complete merge.";
3820 return false;
3821 }
3822
3823 auto process_callback = [&]() -> bool {
3824 if (callback) {
3825 callback();
3826 }
3827 return true;
3828 };
3829
3830 in_factory_data_reset_ = true;
3831 UpdateState state =
3832 ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
3833 in_factory_data_reset_ = false;
3834
3835 if (state == UpdateState::MergeFailed) {
3836 return false;
3837 }
3838
3839 // Nothing should be depending on partitions now, so unmap them all.
3840 if (!UnmapAllPartitionsInRecovery()) {
3841 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3842 }
3843
3844 if (state != UpdateState::None) {
3845 auto lock = LockExclusive();
3846 if (!lock) return false;
3847
3848 // Zap the update state so the bootloader doesn't think we're still
3849 // merging. It's okay if this fails, it's informative only at this
3850 // point.
3851 WriteUpdateState(lock.get(), UpdateState::None);
3852 }
3853 return true;
3854 }
3855
FinishMergeInRecovery()3856 bool SnapshotManager::FinishMergeInRecovery() {
3857 if (!device_->IsRecovery()) {
3858 LOG(ERROR) << "Data wipes are only allowed in recovery.";
3859 return false;
3860 }
3861
3862 auto mount = EnsureMetadataMounted();
3863 if (!mount || !mount->HasDevice()) {
3864 return false;
3865 }
3866
3867 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3868 auto super_path = device_->GetSuperDevice(slot_number);
3869 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3870 LOG(ERROR) << "Unable to map partitions to complete merge.";
3871 return false;
3872 }
3873
3874 UpdateState state = ProcessUpdateState();
3875 if (state != UpdateState::MergeCompleted) {
3876 LOG(ERROR) << "Merge returned unexpected status: " << state;
3877 return false;
3878 }
3879
3880 // Nothing should be depending on partitions now, so unmap them all.
3881 if (!UnmapAllPartitionsInRecovery()) {
3882 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3883 }
3884 return true;
3885 }
3886
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)3887 UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
3888 const std::function<bool()>& callback) {
3889 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3890 UpdateState state = ProcessUpdateState(callback);
3891 LOG(INFO) << "Update state in recovery: " << state;
3892 switch (state) {
3893 case UpdateState::MergeFailed:
3894 LOG(ERROR) << "Unrecoverable merge failure detected.";
3895 return state;
3896 case UpdateState::Unverified: {
3897 // If an OTA was just applied but has not yet started merging:
3898 //
3899 // - if forward merge is allowed, initiate merge and call
3900 // ProcessUpdateState again.
3901 //
3902 // - if forward merge is not allowed, we
3903 // have no choice but to revert slots, because the current slot will
3904 // immediately become unbootable. Rather than wait for the device
3905 // to reboot N times until a rollback, we proactively disable the
3906 // new slot instead.
3907 //
3908 // Since the rollback is inevitable, we don't treat a HAL failure
3909 // as an error here.
3910 auto slot = GetCurrentSlot();
3911 if (slot == Slot::Target) {
3912 if (allow_forward_merge &&
3913 access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
3914 LOG(INFO) << "Forward merge allowed, initiating merge now.";
3915
3916 if (!InitiateMerge()) {
3917 LOG(ERROR) << "Failed to initiate merge on data wipe.";
3918 return UpdateState::MergeFailed;
3919 }
3920 return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
3921 }
3922
3923 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
3924 device_->SetSlotAsUnbootable(slot_number);
3925 } else {
3926 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
3927 }
3928 break;
3929 }
3930 case UpdateState::MergeNeedsReboot:
3931 // We shouldn't get here, because nothing is depending on
3932 // logical partitions.
3933 LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
3934 break;
3935 default:
3936 break;
3937 }
3938 return state;
3939 }
3940
EnsureNoOverflowSnapshot(LockedFile * lock)3941 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
3942 CHECK(lock);
3943
3944 std::vector<std::string> snapshots;
3945 if (!ListSnapshots(lock, &snapshots)) {
3946 LOG(ERROR) << "Could not list snapshots.";
3947 return false;
3948 }
3949
3950 for (const auto& snapshot : snapshots) {
3951 SnapshotStatus status;
3952 if (!ReadSnapshotStatus(lock, snapshot, &status)) {
3953 return false;
3954 }
3955 if (status.compression_enabled()) {
3956 continue;
3957 }
3958
3959 std::vector<DeviceMapper::TargetInfo> targets;
3960 if (!dm_.GetTableStatus(snapshot, &targets)) {
3961 LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
3962 return false;
3963 }
3964 if (targets.size() != 1) {
3965 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
3966 << ", size = " << targets.size();
3967 return false;
3968 }
3969 if (targets[0].IsOverflowSnapshot()) {
3970 LOG(ERROR) << "Detected overflow in snapshot " << snapshot
3971 << ", CoW device size computation is wrong!";
3972 return false;
3973 }
3974 }
3975
3976 return true;
3977 }
3978
RecoveryCreateSnapshotDevices()3979 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
3980 if (!device_->IsRecovery()) {
3981 LOG(ERROR) << __func__ << " is only allowed in recovery.";
3982 return CreateResult::NOT_CREATED;
3983 }
3984
3985 auto mount = EnsureMetadataMounted();
3986 if (!mount || !mount->HasDevice()) {
3987 LOG(ERROR) << "Couldn't mount Metadata.";
3988 return CreateResult::NOT_CREATED;
3989 }
3990 return RecoveryCreateSnapshotDevices(mount);
3991 }
3992
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)3993 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
3994 const std::unique_ptr<AutoDevice>& metadata_device) {
3995 if (!device_->IsRecovery()) {
3996 LOG(ERROR) << __func__ << " is only allowed in recovery.";
3997 return CreateResult::NOT_CREATED;
3998 }
3999
4000 if (metadata_device == nullptr || !metadata_device->HasDevice()) {
4001 LOG(ERROR) << "Metadata not mounted.";
4002 return CreateResult::NOT_CREATED;
4003 }
4004
4005 auto state_file = GetStateFilePath();
4006 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
4007 LOG(ERROR) << "Couldn't access state file.";
4008 return CreateResult::NOT_CREATED;
4009 }
4010
4011 if (!NeedSnapshotsInFirstStageMount()) {
4012 return CreateResult::NOT_CREATED;
4013 }
4014
4015 auto slot_suffix = device_->GetOtherSlotSuffix();
4016 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
4017 auto super_path = device_->GetSuperDevice(slot_number);
4018 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
4019 LOG(ERROR) << "Unable to map partitions.";
4020 return CreateResult::ERROR;
4021 }
4022 return CreateResult::CREATED;
4023 }
4024
UpdateForwardMergeIndicator(bool wipe)4025 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
4026 auto path = GetForwardMergeIndicatorPath();
4027
4028 if (!wipe) {
4029 LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
4030 return RemoveFileIfExists(path);
4031 }
4032
4033 // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
4034
4035 LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
4036 if (!android::base::WriteStringToFile("1", path)) {
4037 PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
4038 return false;
4039 }
4040
4041 return true;
4042 }
4043
GetSnapshotMergeStatsInstance()4044 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
4045 return SnapshotMergeStats::GetInstance(*this);
4046 }
4047
4048 // This is only to be used in recovery or normal Android (not first-stage init).
4049 // We don't guarantee dm paths are available in first-stage init, because ueventd
4050 // isn't running yet.
GetMappedImageDevicePath(const std::string & device_name,std::string * device_path)4051 bool SnapshotManager::GetMappedImageDevicePath(const std::string& device_name,
4052 std::string* device_path) {
4053 // Try getting the device string if it is a device mapper device.
4054 if (dm_.GetState(device_name) != DmDeviceState::INVALID) {
4055 return dm_.GetDmDevicePathByName(device_name, device_path);
4056 }
4057
4058 // Otherwise, get path from IImageManager.
4059 return images_->GetMappedImageDevice(device_name, device_path);
4060 }
4061
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)4062 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
4063 std::string* device_string_or_mapped_path) {
4064 // Try getting the device string if it is a device mapper device.
4065 if (dm_.GetState(device_name) != DmDeviceState::INVALID) {
4066 return dm_.GetDeviceString(device_name, device_string_or_mapped_path);
4067 }
4068
4069 // Otherwise, get path from IImageManager.
4070 if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
4071 return false;
4072 }
4073
4074 LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
4075 << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
4076 << "may not be available in first stage init! ";
4077 return true;
4078 }
4079
WaitForDevice(const std::string & device,std::chrono::milliseconds timeout_ms)4080 bool SnapshotManager::WaitForDevice(const std::string& device,
4081 std::chrono::milliseconds timeout_ms) {
4082 if (!android::base::StartsWith(device, "/")) {
4083 return true;
4084 }
4085
4086 // In first-stage init, we rely on init setting a callback which can
4087 // regenerate uevents and populate /dev for us.
4088 if (uevent_regen_callback_) {
4089 if (!uevent_regen_callback_(device)) {
4090 LOG(ERROR) << "Failed to find device after regenerating uevents: " << device;
4091 return false;
4092 }
4093 return true;
4094 }
4095
4096 // Otherwise, the only kind of device we need to wait for is a dm-user
4097 // misc device. Normal calls to DeviceMapper::CreateDevice() guarantee
4098 // the path has been created.
4099 if (!android::base::StartsWith(device, "/dev/dm-user/")) {
4100 return true;
4101 }
4102
4103 if (timeout_ms.count() == 0) {
4104 LOG(ERROR) << "No timeout was specified to wait for device: " << device;
4105 return false;
4106 }
4107 if (!android::fs_mgr::WaitForFile(device, timeout_ms)) {
4108 LOG(ERROR) << "Timed out waiting for device to appear: " << device;
4109 return false;
4110 }
4111 return true;
4112 }
4113
IsSnapuserdRequired()4114 bool SnapshotManager::IsSnapuserdRequired() {
4115 auto lock = LockExclusive();
4116 if (!lock) return false;
4117
4118 auto status = ReadSnapshotUpdateStatus(lock.get());
4119 return status.state() != UpdateState::None && status.compression_enabled();
4120 }
4121
DetachSnapuserdForSelinux(std::vector<std::string> * snapuserd_argv)4122 bool SnapshotManager::DetachSnapuserdForSelinux(std::vector<std::string>* snapuserd_argv) {
4123 return PerformInitTransition(InitTransition::SELINUX_DETACH, snapuserd_argv);
4124 }
4125
PerformSecondStageInitTransition()4126 bool SnapshotManager::PerformSecondStageInitTransition() {
4127 return PerformInitTransition(InitTransition::SECOND_STAGE);
4128 }
4129
ReadOldPartitionMetadata(LockedFile * lock)4130 const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) {
4131 CHECK(lock);
4132
4133 if (!old_partition_metadata_) {
4134 auto path = GetOldPartitionMetadataPath();
4135 old_partition_metadata_ = android::fs_mgr::ReadFromImageFile(path);
4136 if (!old_partition_metadata_) {
4137 LOG(ERROR) << "Could not read old partition metadata from " << path;
4138 return nullptr;
4139 }
4140 }
4141 return old_partition_metadata_.get();
4142 }
4143
DecideMergePhase(const SnapshotStatus & status)4144 MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) {
4145 if (status.compression_enabled() && status.device_size() < status.old_partition_size()) {
4146 return MergePhase::FIRST_PHASE;
4147 }
4148 return MergePhase::SECOND_PHASE;
4149 }
4150
UpdateCowStats(ISnapshotMergeStats * stats)4151 void SnapshotManager::UpdateCowStats(ISnapshotMergeStats* stats) {
4152 auto lock = LockExclusive();
4153 if (!lock) return;
4154
4155 std::vector<std::string> snapshots;
4156 if (!ListSnapshots(lock.get(), &snapshots, GetSnapshotSlotSuffix())) {
4157 LOG(ERROR) << "Could not list snapshots";
4158 return;
4159 }
4160
4161 uint64_t cow_file_size = 0;
4162 uint64_t total_cow_size = 0;
4163 uint64_t estimated_cow_size = 0;
4164 for (const auto& snapshot : snapshots) {
4165 SnapshotStatus status;
4166 if (!ReadSnapshotStatus(lock.get(), snapshot, &status)) {
4167 return;
4168 }
4169
4170 cow_file_size += status.cow_file_size();
4171 total_cow_size += status.cow_file_size() + status.cow_partition_size();
4172 estimated_cow_size += status.estimated_cow_size();
4173 }
4174
4175 stats->set_cow_file_size(cow_file_size);
4176 stats->set_total_cow_size_bytes(total_cow_size);
4177 stats->set_estimated_cow_size_bytes(estimated_cow_size);
4178 }
4179
DeleteDeviceIfExists(const std::string & name,const std::chrono::milliseconds & timeout_ms)4180 bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
4181 const std::chrono::milliseconds& timeout_ms) {
4182 auto start = std::chrono::steady_clock::now();
4183 while (true) {
4184 if (dm_.DeleteDeviceIfExists(name)) {
4185 return true;
4186 }
4187 auto now = std::chrono::steady_clock::now();
4188 auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
4189 if (elapsed >= timeout_ms) {
4190 break;
4191 }
4192 std::this_thread::sleep_for(400ms);
4193 }
4194
4195 // Try to diagnose why this failed. First get the actual device path.
4196 std::string full_path;
4197 if (!dm_.GetDmDevicePathByName(name, &full_path)) {
4198 LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
4199 return false;
4200 }
4201
4202 // Check for child dm-devices.
4203 std::string block_name = android::base::Basename(full_path);
4204 std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
4205
4206 std::error_code ec;
4207 std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
4208 if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
4209 LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
4210 return false;
4211 }
4212
4213 // Check for mounted partitions.
4214 android::fs_mgr::Fstab fstab;
4215 android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
4216 for (const auto& entry : fstab) {
4217 if (android::base::Basename(entry.blk_device) == block_name) {
4218 LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
4219 return false;
4220 }
4221 }
4222
4223 // Check for detached mounted partitions.
4224 for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
4225 std::string fs_type = android::base::Basename(fs.path().c_str());
4226 if (!(fs_type == "ext4" || fs_type == "f2fs")) {
4227 continue;
4228 }
4229
4230 std::string path = fs.path().c_str() + "/"s + block_name;
4231 if (access(path.c_str(), F_OK) == 0) {
4232 LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
4233 << "; possibly open file descriptor or attached loop device.";
4234 return false;
4235 }
4236 }
4237
4238 LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")"
4239 << " still in use."
4240 << " Probably a file descriptor was leaked or held open, or a loop device is"
4241 << " attached.";
4242 return false;
4243 }
4244
ReadMergeFailureCode()4245 MergeFailureCode SnapshotManager::ReadMergeFailureCode() {
4246 auto lock = LockExclusive();
4247 if (!lock) return MergeFailureCode::AcquireLock;
4248
4249 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
4250 if (status.state() != UpdateState::MergeFailed) {
4251 return MergeFailureCode::Ok;
4252 }
4253 return status.merge_failure_code();
4254 }
4255
ReadSourceBuildFingerprint()4256 std::string SnapshotManager::ReadSourceBuildFingerprint() {
4257 auto lock = LockExclusive();
4258 if (!lock) return {};
4259
4260 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
4261 return status.source_build_fingerprint();
4262 }
4263
4264 } // namespace snapshot
4265 } // namespace android
4266