1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <libsnapshot/snapshot.h>
16
17 #include <dirent.h>
18 #include <fcntl.h>
19 #include <math.h>
20 #include <sys/file.h>
21 #include <sys/types.h>
22 #include <sys/unistd.h>
23
24 #include <filesystem>
25 #include <optional>
26 #include <thread>
27 #include <unordered_set>
28
29 #include <android-base/file.h>
30 #include <android-base/logging.h>
31 #include <android-base/parseint.h>
32 #include <android-base/properties.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35 #include <cutils/sockets.h>
36 #include <ext4_utils/ext4_utils.h>
37 #include <fs_mgr.h>
38 #include <fs_mgr/file_wait.h>
39 #include <fs_mgr_dm_linear.h>
40 #include <fstab/fstab.h>
41 #include <libdm/dm.h>
42 #include <libfiemap/image_manager.h>
43 #include <liblp/liblp.h>
44
45 #include <android/snapshot/snapshot.pb.h>
46 #include <libsnapshot/snapshot_stats.h>
47 #include "device_info.h"
48 #include "partition_cow_creator.h"
49 #include "snapshot_metadata_updater.h"
50 #include "snapshot_reader.h"
51 #include "utility.h"
52
53 namespace android {
54 namespace snapshot {
55
56 using android::base::unique_fd;
57 using android::dm::DeviceMapper;
58 using android::dm::DmDeviceState;
59 using android::dm::DmTable;
60 using android::dm::DmTargetLinear;
61 using android::dm::DmTargetSnapshot;
62 using android::dm::DmTargetUser;
63 using android::dm::kSectorSize;
64 using android::dm::SnapshotStorageMode;
65 using android::fiemap::FiemapStatus;
66 using android::fiemap::IImageManager;
67 using android::fs_mgr::CreateDmTable;
68 using android::fs_mgr::CreateLogicalPartition;
69 using android::fs_mgr::CreateLogicalPartitionParams;
70 using android::fs_mgr::GetPartitionGroupName;
71 using android::fs_mgr::GetPartitionName;
72 using android::fs_mgr::LpMetadata;
73 using android::fs_mgr::MetadataBuilder;
74 using android::fs_mgr::SlotNumberForSlotSuffix;
75 using android::hardware::boot::V1_1::MergeStatus;
76 using chromeos_update_engine::DeltaArchiveManifest;
77 using chromeos_update_engine::Extent;
78 using chromeos_update_engine::FileDescriptor;
79 using chromeos_update_engine::PartitionUpdate;
80 template <typename T>
81 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
82 using std::chrono::duration_cast;
83 using namespace std::chrono_literals;
84 using namespace std::string_literals;
85
86 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
87 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
88 static constexpr auto kUpdateStateCheckInterval = 2s;
89
90 // Note: IImageManager is an incomplete type in the header, so the default
91 // destructor doesn't work.
~SnapshotManager()92 SnapshotManager::~SnapshotManager() {}
93
New(IDeviceInfo * info)94 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
95 if (!info) {
96 info = new DeviceInfo();
97 }
98 return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
99 }
100
NewForFirstStageMount(IDeviceInfo * info)101 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
102 if (!info) {
103 DeviceInfo* impl = new DeviceInfo();
104 impl->set_first_stage_init(true);
105 info = impl;
106 }
107 auto sm = New(info);
108
109 // The first-stage version of snapuserd is explicitly started by init. Do
110 // not attempt to using it during tests (which run in normal AOSP).
111 if (!sm->device()->IsTestDevice()) {
112 sm->use_first_stage_snapuserd_ = true;
113 }
114 return sm;
115 }
116
SnapshotManager(IDeviceInfo * device)117 SnapshotManager::SnapshotManager(IDeviceInfo* device) : device_(device) {
118 metadata_dir_ = device_->GetMetadataDir();
119 }
120
GetCowName(const std::string & snapshot_name)121 static std::string GetCowName(const std::string& snapshot_name) {
122 return snapshot_name + "-cow";
123 }
124
GetDmUserCowName(const std::string & snapshot_name)125 static std::string GetDmUserCowName(const std::string& snapshot_name) {
126 return snapshot_name + "-user-cow";
127 }
128
GetCowImageDeviceName(const std::string & snapshot_name)129 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
130 return snapshot_name + "-cow-img";
131 }
132
GetBaseDeviceName(const std::string & partition_name)133 static std::string GetBaseDeviceName(const std::string& partition_name) {
134 return partition_name + "-base";
135 }
136
GetSourceDeviceName(const std::string & partition_name)137 static std::string GetSourceDeviceName(const std::string& partition_name) {
138 return partition_name + "-src";
139 }
140
BeginUpdate()141 bool SnapshotManager::BeginUpdate() {
142 bool needs_merge = false;
143 if (!TryCancelUpdate(&needs_merge)) {
144 return false;
145 }
146 if (needs_merge) {
147 LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
148 auto state = ProcessUpdateState();
149 LOG(INFO) << "Merged with state = " << state;
150 }
151
152 auto file = LockExclusive();
153 if (!file) return false;
154
155 // Purge the ImageManager just in case there is a corrupt lp_metadata file
156 // lying around. (NB: no need to return false on an error, we can let the
157 // update try to progress.)
158 if (EnsureImageManager()) {
159 images_->RemoveAllImages();
160 }
161
162 // Clear any cached metadata (this allows re-using one manager across tests).
163 old_partition_metadata_ = nullptr;
164
165 auto state = ReadUpdateState(file.get());
166 if (state != UpdateState::None) {
167 LOG(ERROR) << "An update is already in progress, cannot begin a new update";
168 return false;
169 }
170 return WriteUpdateState(file.get(), UpdateState::Initiated);
171 }
172
CancelUpdate()173 bool SnapshotManager::CancelUpdate() {
174 bool needs_merge = false;
175 if (!TryCancelUpdate(&needs_merge)) {
176 return false;
177 }
178 if (needs_merge) {
179 LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
180 }
181 return !needs_merge;
182 }
183
TryCancelUpdate(bool * needs_merge)184 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
185 *needs_merge = false;
186
187 auto file = LockExclusive();
188 if (!file) return false;
189
190 UpdateState state = ReadUpdateState(file.get());
191 if (state == UpdateState::None) return true;
192
193 if (state == UpdateState::Initiated) {
194 LOG(INFO) << "Update has been initiated, now canceling";
195 return RemoveAllUpdateState(file.get());
196 }
197
198 if (state == UpdateState::Unverified) {
199 // We completed an update, but it can still be canceled if we haven't booted into it.
200 auto slot = GetCurrentSlot();
201 if (slot != Slot::Target) {
202 LOG(INFO) << "Canceling previously completed updates (if any)";
203 return RemoveAllUpdateState(file.get());
204 }
205 }
206 *needs_merge = true;
207 return true;
208 }
209
ReadUpdateSourceSlotSuffix()210 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
211 auto boot_file = GetSnapshotBootIndicatorPath();
212 std::string contents;
213 if (!android::base::ReadFileToString(boot_file, &contents)) {
214 PLOG(WARNING) << "Cannot read " << boot_file;
215 return {};
216 }
217 return contents;
218 }
219
GetCurrentSlot()220 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
221 auto contents = ReadUpdateSourceSlotSuffix();
222 if (contents.empty()) {
223 return Slot::Unknown;
224 }
225 if (device_->GetSlotSuffix() == contents) {
226 return Slot::Source;
227 }
228 return Slot::Target;
229 }
230
GetSnapshotSlotSuffix()231 std::string SnapshotManager::GetSnapshotSlotSuffix() {
232 switch (GetCurrentSlot()) {
233 case Slot::Target:
234 return device_->GetSlotSuffix();
235 default:
236 return device_->GetOtherSlotSuffix();
237 }
238 }
239
RemoveFileIfExists(const std::string & path)240 static bool RemoveFileIfExists(const std::string& path) {
241 std::string message;
242 if (!android::base::RemoveFileIfExists(path, &message)) {
243 LOG(ERROR) << "Remove failed: " << path << ": " << message;
244 return false;
245 }
246 return true;
247 }
248
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)249 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
250 if (prolog && !prolog()) {
251 LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
252 return false;
253 }
254
255 LOG(INFO) << "Removing all update state.";
256
257 if (!RemoveAllSnapshots(lock)) {
258 LOG(ERROR) << "Could not remove all snapshots";
259 return false;
260 }
261
262 // It's okay if these fail:
263 // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
264 // reading the indicator file, so it's not a problem if it still exists
265 // after the update completes.
266 // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
267 // matches the incoming update.
268 std::vector<std::string> files = {
269 GetSnapshotBootIndicatorPath(),
270 GetRollbackIndicatorPath(),
271 GetForwardMergeIndicatorPath(),
272 GetOldPartitionMetadataPath(),
273 };
274 for (const auto& file : files) {
275 RemoveFileIfExists(file);
276 }
277
278 // If this fails, we'll keep trying to remove the update state (as the
279 // device reboots or starts a new update) until it finally succeeds.
280 return WriteUpdateState(lock, UpdateState::None);
281 }
282
FinishedSnapshotWrites(bool wipe)283 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
284 auto lock = LockExclusive();
285 if (!lock) return false;
286
287 auto update_state = ReadUpdateState(lock.get());
288 if (update_state == UpdateState::Unverified) {
289 LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
290 return true;
291 }
292
293 if (update_state != UpdateState::Initiated) {
294 LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
295 return false;
296 }
297
298 if (!EnsureNoOverflowSnapshot(lock.get())) {
299 LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
300 return false;
301 }
302
303 if (!UpdateForwardMergeIndicator(wipe)) {
304 return false;
305 }
306
307 // This file is written on boot to detect whether a rollback occurred. It
308 // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
309 // snapshots too early.
310 if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
311 return false;
312 }
313
314 // This file acts as both a quick indicator for init (it can use access(2)
315 // to decide how to do first-stage mounts), and it stores the old slot, so
316 // we can tell whether or not we performed a rollback.
317 auto contents = device_->GetSlotSuffix();
318 auto boot_file = GetSnapshotBootIndicatorPath();
319 if (!WriteStringToFileAtomic(contents, boot_file)) {
320 PLOG(ERROR) << "write failed: " << boot_file;
321 return false;
322 }
323 return WriteUpdateState(lock.get(), UpdateState::Unverified);
324 }
325
CreateSnapshot(LockedFile * lock,PartitionCowCreator * cow_creator,SnapshotStatus * status)326 bool SnapshotManager::CreateSnapshot(LockedFile* lock, PartitionCowCreator* cow_creator,
327 SnapshotStatus* status) {
328 CHECK(lock);
329 CHECK(lock->lock_mode() == LOCK_EX);
330 CHECK(status);
331
332 if (status->name().empty()) {
333 LOG(ERROR) << "SnapshotStatus has no name.";
334 return false;
335 }
336 // Check these sizes. Like liblp, we guarantee the partition size is
337 // respected, which means it has to be sector-aligned. (This guarantee is
338 // useful for locating avb footers correctly). The COW file size, however,
339 // can be arbitrarily larger than specified, so we can safely round it up.
340 if (status->device_size() % kSectorSize != 0) {
341 LOG(ERROR) << "Snapshot " << status->name()
342 << " device size is not a multiple of the sector size: "
343 << status->device_size();
344 return false;
345 }
346 if (status->snapshot_size() % kSectorSize != 0) {
347 LOG(ERROR) << "Snapshot " << status->name()
348 << " snapshot size is not a multiple of the sector size: "
349 << status->snapshot_size();
350 return false;
351 }
352 if (status->cow_partition_size() % kSectorSize != 0) {
353 LOG(ERROR) << "Snapshot " << status->name()
354 << " cow partition size is not a multiple of the sector size: "
355 << status->cow_partition_size();
356 return false;
357 }
358 if (status->cow_file_size() % kSectorSize != 0) {
359 LOG(ERROR) << "Snapshot " << status->name()
360 << " cow file size is not a multiple of the sector size: "
361 << status->cow_file_size();
362 return false;
363 }
364
365 status->set_state(SnapshotState::CREATED);
366 status->set_sectors_allocated(0);
367 status->set_metadata_sectors(0);
368 status->set_compression_enabled(cow_creator->compression_enabled);
369 status->set_compression_algorithm(cow_creator->compression_algorithm);
370
371 if (!WriteSnapshotStatus(lock, *status)) {
372 PLOG(ERROR) << "Could not write snapshot status: " << status->name();
373 return false;
374 }
375 return true;
376 }
377
CreateCowImage(LockedFile * lock,const std::string & name)378 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
379 CHECK(lock);
380 CHECK(lock->lock_mode() == LOCK_EX);
381 if (!EnsureImageManager()) return Return::Error();
382
383 SnapshotStatus status;
384 if (!ReadSnapshotStatus(lock, name, &status)) {
385 return Return::Error();
386 }
387
388 // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
389 if (status.cow_file_size() % kSectorSize != 0) {
390 LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
391 << status.cow_file_size();
392 return Return::Error();
393 }
394
395 std::string cow_image_name = GetCowImageDeviceName(name);
396 int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
397 return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
398 }
399
MapDmUserCow(LockedFile * lock,const std::string & name,const std::string & cow_file,const std::string & base_device,const std::chrono::milliseconds & timeout_ms,std::string * path)400 bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name,
401 const std::string& cow_file, const std::string& base_device,
402 const std::chrono::milliseconds& timeout_ms, std::string* path) {
403 CHECK(lock);
404
405 auto& dm = DeviceMapper::Instance();
406
407 // Use an extra decoration for first-stage init, so we can transition
408 // to a new table entry in second-stage.
409 std::string misc_name = name;
410 if (use_first_stage_snapuserd_) {
411 misc_name += "-init";
412 }
413
414 if (!EnsureSnapuserdConnected()) {
415 return false;
416 }
417
418 uint64_t base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device);
419 if (base_sectors == 0) {
420 LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
421 return false;
422 }
423
424 DmTable table;
425 table.Emplace<DmTargetUser>(0, base_sectors, misc_name);
426 if (!dm.CreateDevice(name, table, path, timeout_ms)) {
427 return false;
428 }
429 if (!WaitForDevice(*path, timeout_ms)) {
430 return false;
431 }
432
433 auto control_device = "/dev/dm-user/" + misc_name;
434 if (!WaitForDevice(control_device, timeout_ms)) {
435 return false;
436 }
437
438 return snapuserd_client_->AttachDmUser(misc_name);
439 }
440
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)441 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
442 const std::string& base_device, const std::string& cow_device,
443 const std::chrono::milliseconds& timeout_ms,
444 std::string* dev_path) {
445 CHECK(lock);
446
447 SnapshotStatus status;
448 if (!ReadSnapshotStatus(lock, name, &status)) {
449 return false;
450 }
451 if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
452 LOG(ERROR) << "Should not create a snapshot device for " << name
453 << " after merging has completed.";
454 return false;
455 }
456
457 // Validate the block device size, as well as the requested snapshot size.
458 // Note that during first-stage init, we don't have the device paths.
459 if (android::base::StartsWith(base_device, "/")) {
460 unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
461 if (fd < 0) {
462 PLOG(ERROR) << "open failed: " << base_device;
463 return false;
464 }
465 auto dev_size = get_block_device_size(fd);
466 if (!dev_size) {
467 PLOG(ERROR) << "Could not determine block device size: " << base_device;
468 return false;
469 }
470 if (status.device_size() != dev_size) {
471 LOG(ERROR) << "Block device size for " << base_device << " does not match"
472 << "(expected " << status.device_size() << ", got " << dev_size << ")";
473 return false;
474 }
475 }
476 if (status.device_size() % kSectorSize != 0) {
477 LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
478 return false;
479 }
480 if (status.snapshot_size() % kSectorSize != 0 ||
481 status.snapshot_size() > status.device_size()) {
482 LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
483 return false;
484 }
485 if (status.device_size() != status.snapshot_size()) {
486 LOG(ERROR) << "Device size and snapshot size must be the same (device size = "
487 << status.device_size() << ", snapshot size = " << status.snapshot_size();
488 return false;
489 }
490
491 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
492
493 auto& dm = DeviceMapper::Instance();
494
495 // Note that merging is a global state. We do track whether individual devices
496 // have completed merging, but the start of the merge process is considered
497 // atomic.
498 SnapshotStorageMode mode;
499 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
500 switch (update_status.state()) {
501 case UpdateState::MergeCompleted:
502 case UpdateState::MergeNeedsReboot:
503 LOG(ERROR) << "Should not create a snapshot device for " << name
504 << " after global merging has completed.";
505 return false;
506 case UpdateState::Merging:
507 case UpdateState::MergeFailed:
508 // Note: MergeFailed indicates that a merge is in progress, but
509 // is possibly stalled. We still have to honor the merge.
510 if (DecideMergePhase(status) == update_status.merge_phase()) {
511 mode = SnapshotStorageMode::Merge;
512 } else {
513 mode = SnapshotStorageMode::Persistent;
514 }
515 break;
516 default:
517 mode = SnapshotStorageMode::Persistent;
518 break;
519 }
520
521 DmTable table;
522 table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
523 kSnapshotChunkSize);
524 if (!dm.CreateDevice(name, table, dev_path, timeout_ms)) {
525 LOG(ERROR) << "Could not create snapshot device: " << name;
526 return false;
527 }
528 return true;
529 }
530
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)531 std::optional<std::string> SnapshotManager::MapCowImage(
532 const std::string& name, const std::chrono::milliseconds& timeout_ms) {
533 if (!EnsureImageManager()) return std::nullopt;
534 auto cow_image_name = GetCowImageDeviceName(name);
535
536 bool ok;
537 std::string cow_dev;
538 if (device_->IsRecovery() || device_->IsFirstStageInit()) {
539 const auto& opener = device_->GetPartitionOpener();
540 ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
541 } else {
542 ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
543 }
544
545 if (ok) {
546 LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
547 return cow_dev;
548 }
549 LOG(ERROR) << "Could not map image device: " << cow_image_name;
550 return std::nullopt;
551 }
552
MapSourceDevice(LockedFile * lock,const std::string & name,const std::chrono::milliseconds & timeout_ms,std::string * path)553 bool SnapshotManager::MapSourceDevice(LockedFile* lock, const std::string& name,
554 const std::chrono::milliseconds& timeout_ms,
555 std::string* path) {
556 CHECK(lock);
557
558 auto metadata = ReadOldPartitionMetadata(lock);
559 if (!metadata) {
560 LOG(ERROR) << "Could not map source device due to missing or corrupt metadata";
561 return false;
562 }
563
564 auto old_name = GetOtherPartitionName(name);
565 auto slot_suffix = device_->GetSlotSuffix();
566 auto slot = SlotNumberForSlotSuffix(slot_suffix);
567
568 CreateLogicalPartitionParams params = {
569 .block_device = device_->GetSuperDevice(slot),
570 .metadata = metadata,
571 .partition_name = old_name,
572 .timeout_ms = timeout_ms,
573 .device_name = GetSourceDeviceName(name),
574 .partition_opener = &device_->GetPartitionOpener(),
575 };
576 if (!CreateLogicalPartition(std::move(params), path)) {
577 LOG(ERROR) << "Could not create source device for snapshot " << name;
578 return false;
579 }
580 return true;
581 }
582
UnmapSnapshot(LockedFile * lock,const std::string & name)583 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
584 CHECK(lock);
585
586 if (!DeleteDeviceIfExists(name)) {
587 LOG(ERROR) << "Could not delete snapshot device: " << name;
588 return false;
589 }
590 return true;
591 }
592
UnmapCowImage(const std::string & name)593 bool SnapshotManager::UnmapCowImage(const std::string& name) {
594 if (!EnsureImageManager()) return false;
595 return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
596 }
597
DeleteSnapshot(LockedFile * lock,const std::string & name)598 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
599 CHECK(lock);
600 CHECK(lock->lock_mode() == LOCK_EX);
601 if (!EnsureImageManager()) return false;
602
603 if (!UnmapCowDevices(lock, name)) {
604 return false;
605 }
606
607 // We can't delete snapshots in recovery. The only way we'd try is it we're
608 // completing or canceling a merge in preparation for a data wipe, in which
609 // case, we don't care if the file sticks around.
610 if (device_->IsRecovery()) {
611 LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
612 return true;
613 }
614
615 auto cow_image_name = GetCowImageDeviceName(name);
616 if (images_->BackingImageExists(cow_image_name)) {
617 if (!images_->DeleteBackingImage(cow_image_name)) {
618 return false;
619 }
620 }
621
622 std::string error;
623 auto file_path = GetSnapshotStatusFilePath(name);
624 if (!android::base::RemoveFileIfExists(file_path, &error)) {
625 LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
626 return false;
627 }
628 return true;
629 }
630
InitiateMerge()631 bool SnapshotManager::InitiateMerge() {
632 auto lock = LockExclusive();
633 if (!lock) return false;
634
635 UpdateState state = ReadUpdateState(lock.get());
636 if (state != UpdateState::Unverified) {
637 LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
638 return false;
639 }
640
641 auto slot = GetCurrentSlot();
642 if (slot != Slot::Target) {
643 LOG(ERROR) << "Device cannot merge while not booting from new slot";
644 return false;
645 }
646
647 std::vector<std::string> snapshots;
648 if (!ListSnapshots(lock.get(), &snapshots)) {
649 LOG(ERROR) << "Could not list snapshots";
650 return false;
651 }
652
653 auto other_suffix = device_->GetOtherSlotSuffix();
654
655 auto& dm = DeviceMapper::Instance();
656 for (const auto& snapshot : snapshots) {
657 if (android::base::EndsWith(snapshot, other_suffix)) {
658 // Allow the merge to continue, but log this unexpected case.
659 LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
660 continue;
661 }
662
663 // The device has to be mapped, since everything should be merged at
664 // the same time. This is a fairly serious error. We could forcefully
665 // map everything here, but it should have been mapped during first-
666 // stage init.
667 if (dm.GetState(snapshot) == DmDeviceState::INVALID) {
668 LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
669 return false;
670 }
671 }
672
673 auto metadata = ReadCurrentMetadata();
674 for (auto it = snapshots.begin(); it != snapshots.end();) {
675 switch (GetMetadataPartitionState(*metadata, *it)) {
676 case MetadataPartitionState::Flashed:
677 LOG(WARNING) << "Detected re-flashing for partition " << *it
678 << ". Skip merging it.";
679 [[fallthrough]];
680 case MetadataPartitionState::None: {
681 LOG(WARNING) << "Deleting snapshot for partition " << *it;
682 if (!DeleteSnapshot(lock.get(), *it)) {
683 LOG(WARNING) << "Cannot delete snapshot for partition " << *it
684 << ". Skip merging it anyways.";
685 }
686 it = snapshots.erase(it);
687 } break;
688 case MetadataPartitionState::Updated: {
689 ++it;
690 } break;
691 }
692 }
693
694 bool compression_enabled = false;
695
696 std::vector<std::string> first_merge_group;
697
698 DmTargetSnapshot::Status initial_target_values = {};
699 for (const auto& snapshot : snapshots) {
700 DmTargetSnapshot::Status current_status;
701 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) {
702 return false;
703 }
704 initial_target_values.sectors_allocated += current_status.sectors_allocated;
705 initial_target_values.total_sectors += current_status.total_sectors;
706 initial_target_values.metadata_sectors += current_status.metadata_sectors;
707
708 SnapshotStatus snapshot_status;
709 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
710 return false;
711 }
712
713 compression_enabled |= snapshot_status.compression_enabled();
714 if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) {
715 first_merge_group.emplace_back(snapshot);
716 }
717 }
718
719 SnapshotUpdateStatus initial_status = ReadSnapshotUpdateStatus(lock.get());
720 initial_status.set_state(UpdateState::Merging);
721 initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
722 initial_status.set_total_sectors(initial_target_values.total_sectors);
723 initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
724 initial_status.set_compression_enabled(compression_enabled);
725
726 // If any partitions shrunk, we need to merge them before we merge any other
727 // partitions (see b/177935716). Otherwise, a merge from another partition
728 // may overwrite the source block of a copy operation.
729 const std::vector<std::string>* merge_group;
730 if (first_merge_group.empty()) {
731 merge_group = &snapshots;
732 initial_status.set_merge_phase(MergePhase::SECOND_PHASE);
733 } else {
734 merge_group = &first_merge_group;
735 initial_status.set_merge_phase(MergePhase::FIRST_PHASE);
736 }
737
738 // Point of no return - mark that we're starting a merge. From now on every
739 // eligible snapshot must be a merge target.
740 if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
741 return false;
742 }
743
744 auto reported_code = MergeFailureCode::Ok;
745 for (const auto& snapshot : *merge_group) {
746 // If this fails, we have no choice but to continue. Everything must
747 // be merged. This is not an ideal state to be in, but it is safe,
748 // because we the next boot will try again.
749 auto code = SwitchSnapshotToMerge(lock.get(), snapshot);
750 if (code != MergeFailureCode::Ok) {
751 LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
752 if (reported_code == MergeFailureCode::Ok) {
753 reported_code = code;
754 }
755 }
756 }
757
758 // If we couldn't switch everything to a merge target, pre-emptively mark
759 // this merge as failed. It will get acknowledged when WaitForMerge() is
760 // called.
761 if (reported_code != MergeFailureCode::Ok) {
762 WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code);
763 }
764
765 // Return true no matter what, because a merge was initiated.
766 return true;
767 }
768
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)769 MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
770 SnapshotStatus status;
771 if (!ReadSnapshotStatus(lock, name, &status)) {
772 return MergeFailureCode::ReadStatus;
773 }
774 if (status.state() != SnapshotState::CREATED) {
775 LOG(WARNING) << "Snapshot " << name
776 << " has unexpected state: " << SnapshotState_Name(status.state());
777 }
778
779 // After this, we return true because we technically did switch to a merge
780 // target. Everything else we do here is just informational.
781 if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) {
782 return code;
783 }
784
785 status.set_state(SnapshotState::MERGING);
786
787 DmTargetSnapshot::Status dm_status;
788 if (!QuerySnapshotStatus(name, nullptr, &dm_status)) {
789 LOG(ERROR) << "Could not query merge status for snapshot: " << name;
790 }
791 status.set_sectors_allocated(dm_status.sectors_allocated);
792 status.set_metadata_sectors(dm_status.metadata_sectors);
793 if (!WriteSnapshotStatus(lock, status)) {
794 LOG(ERROR) << "Could not update status file for snapshot: " << name;
795 }
796 return MergeFailureCode::Ok;
797 }
798
RewriteSnapshotDeviceTable(const std::string & name)799 MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
800 auto& dm = DeviceMapper::Instance();
801
802 std::vector<DeviceMapper::TargetInfo> old_targets;
803 if (!dm.GetTableInfo(name, &old_targets)) {
804 LOG(ERROR) << "Could not read snapshot device table: " << name;
805 return MergeFailureCode::GetTableInfo;
806 }
807 if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
808 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name;
809 return MergeFailureCode::UnknownTable;
810 }
811
812 std::string base_device, cow_device;
813 if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
814 LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name;
815 return MergeFailureCode::GetTableParams;
816 }
817
818 DmTable table;
819 table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
820 SnapshotStorageMode::Merge, kSnapshotChunkSize);
821 if (!dm.LoadTableAndActivate(name, table)) {
822 LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name;
823 return MergeFailureCode::ActivateNewTable;
824 }
825 LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name;
826 return MergeFailureCode::Ok;
827 }
828
829 enum class TableQuery {
830 Table,
831 Status,
832 };
833
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)834 static bool GetSingleTarget(const std::string& dm_name, TableQuery query,
835 DeviceMapper::TargetInfo* target) {
836 auto& dm = DeviceMapper::Instance();
837 if (dm.GetState(dm_name) == DmDeviceState::INVALID) {
838 return false;
839 }
840
841 std::vector<DeviceMapper::TargetInfo> targets;
842 bool result;
843 if (query == TableQuery::Status) {
844 result = dm.GetTableStatus(dm_name, &targets);
845 } else {
846 result = dm.GetTableInfo(dm_name, &targets);
847 }
848 if (!result) {
849 LOG(ERROR) << "Could not query device: " << dm_name;
850 return false;
851 }
852 if (targets.size() != 1) {
853 return false;
854 }
855
856 *target = std::move(targets[0]);
857 return true;
858 }
859
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)860 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
861 DeviceMapper::TargetInfo snap_target;
862 if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
863 return false;
864 }
865 auto type = DeviceMapper::GetTargetType(snap_target.spec);
866 if (type != "snapshot" && type != "snapshot-merge") {
867 return false;
868 }
869 if (target) {
870 *target = std::move(snap_target);
871 }
872 return true;
873 }
874
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)875 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
876 DmTargetSnapshot::Status* status) {
877 DeviceMapper::TargetInfo target;
878 if (!IsSnapshotDevice(dm_name, &target)) {
879 LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
880 return false;
881 }
882 if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
883 LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
884 return false;
885 }
886 if (target_type) {
887 *target_type = DeviceMapper::GetTargetType(target.spec);
888 }
889 return true;
890 }
891
892 // Note that when a merge fails, we will *always* try again to complete the
893 // merge each time the device boots. There is no harm in doing so, and if
894 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)895 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
896 const std::function<bool()>& before_cancel) {
897 while (true) {
898 auto result = CheckMergeState(before_cancel);
899 LOG(INFO) << "ProcessUpdateState handling state: " << result.state;
900
901 if (result.state == UpdateState::MergeFailed) {
902 AcknowledgeMergeFailure(result.failure_code);
903 }
904 if (result.state != UpdateState::Merging) {
905 // Either there is no merge, or the merge was finished, so no need
906 // to keep waiting.
907 return result.state;
908 }
909
910 if (callback && !callback()) {
911 return result.state;
912 }
913
914 // This wait is not super time sensitive, so we have a relatively
915 // low polling frequency.
916 std::this_thread::sleep_for(kUpdateStateCheckInterval);
917 }
918 }
919
CheckMergeState(const std::function<bool ()> & before_cancel)920 auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult {
921 auto lock = LockExclusive();
922 if (!lock) {
923 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock);
924 }
925
926 auto result = CheckMergeState(lock.get(), before_cancel);
927 LOG(INFO) << "CheckMergeState for snapshots returned: " << result.state;
928
929 if (result.state == UpdateState::MergeCompleted) {
930 // Do this inside the same lock. Failures get acknowledged without the
931 // lock, because flock() might have failed.
932 AcknowledgeMergeSuccess(lock.get());
933 } else if (result.state == UpdateState::Cancelled) {
934 if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
935 LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
936 }
937 }
938 return result;
939 }
940
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)941 auto SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel)
942 -> MergeResult {
943 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
944 switch (update_status.state()) {
945 case UpdateState::None:
946 case UpdateState::MergeCompleted:
947 // Harmless races are allowed between two callers of WaitForMerge,
948 // so in both of these cases we just propagate the state.
949 return MergeResult(update_status.state());
950
951 case UpdateState::Merging:
952 case UpdateState::MergeNeedsReboot:
953 case UpdateState::MergeFailed:
954 // We'll poll each snapshot below. Note that for the NeedsReboot
955 // case, we always poll once to give cleanup another opportunity to
956 // run.
957 break;
958
959 case UpdateState::Unverified:
960 // This is an edge case. Normally cancelled updates are detected
961 // via the merge poll below, but if we never started a merge, we
962 // need to also check here.
963 if (HandleCancelledUpdate(lock, before_cancel)) {
964 return MergeResult(UpdateState::Cancelled);
965 }
966 return MergeResult(update_status.state());
967
968 default:
969 return MergeResult(update_status.state());
970 }
971
972 std::vector<std::string> snapshots;
973 if (!ListSnapshots(lock, &snapshots)) {
974 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
975 }
976
977 auto other_suffix = device_->GetOtherSlotSuffix();
978
979 bool cancelled = false;
980 bool merging = false;
981 bool needs_reboot = false;
982 bool wrong_phase = false;
983 MergeFailureCode failure_code = MergeFailureCode::Ok;
984 for (const auto& snapshot : snapshots) {
985 if (android::base::EndsWith(snapshot, other_suffix)) {
986 // This will have triggered an error message in InitiateMerge already.
987 LOG(INFO) << "Skipping merge validation of unexpected snapshot: " << snapshot;
988 continue;
989 }
990
991 auto result = CheckTargetMergeState(lock, snapshot, update_status);
992 LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << result.state;
993
994 switch (result.state) {
995 case UpdateState::MergeFailed:
996 // Take the first failure code in case other failures compound.
997 if (failure_code == MergeFailureCode::Ok) {
998 failure_code = result.failure_code;
999 }
1000 break;
1001 case UpdateState::Merging:
1002 merging = true;
1003 break;
1004 case UpdateState::MergeNeedsReboot:
1005 needs_reboot = true;
1006 break;
1007 case UpdateState::MergeCompleted:
1008 break;
1009 case UpdateState::Cancelled:
1010 cancelled = true;
1011 break;
1012 case UpdateState::None:
1013 wrong_phase = true;
1014 break;
1015 default:
1016 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
1017 << "\"" << result.state << "\"";
1018 if (failure_code == MergeFailureCode::Ok) {
1019 failure_code = MergeFailureCode::UnexpectedMergeState;
1020 }
1021 break;
1022 }
1023 }
1024
1025 if (merging) {
1026 // Note that we handle "Merging" before we handle anything else. We
1027 // want to poll until *nothing* is merging if we can, so everything has
1028 // a chance to get marked as completed or failed.
1029 return MergeResult(UpdateState::Merging);
1030 }
1031 if (failure_code != MergeFailureCode::Ok) {
1032 // Note: since there are many drop-out cases for failure, we acknowledge
1033 // it in WaitForMerge rather than here and elsewhere.
1034 return MergeResult(UpdateState::MergeFailed, failure_code);
1035 }
1036 if (wrong_phase) {
1037 // If we got here, no other partitions are being merged, and nothing
1038 // failed to merge. It's safe to move to the next merge phase.
1039 auto code = MergeSecondPhaseSnapshots(lock);
1040 if (code != MergeFailureCode::Ok) {
1041 return MergeResult(UpdateState::MergeFailed, code);
1042 }
1043 return MergeResult(UpdateState::Merging);
1044 }
1045 if (needs_reboot) {
1046 WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
1047 return MergeResult(UpdateState::MergeNeedsReboot);
1048 }
1049 if (cancelled) {
1050 // This is an edge case, that we handle as correctly as we sensibly can.
1051 // The underlying partition has changed behind update_engine, and we've
1052 // removed the snapshot as a result. The exact state of the update is
1053 // undefined now, but this can only happen on an unlocked device where
1054 // partitions can be flashed without wiping userdata.
1055 return MergeResult(UpdateState::Cancelled);
1056 }
1057 return MergeResult(UpdateState::MergeCompleted);
1058 }
1059
CheckTargetMergeState(LockedFile * lock,const std::string & name,const SnapshotUpdateStatus & update_status)1060 auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
1061 const SnapshotUpdateStatus& update_status)
1062 -> MergeResult {
1063 SnapshotStatus snapshot_status;
1064 if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
1065 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus);
1066 }
1067
1068 std::unique_ptr<LpMetadata> current_metadata;
1069
1070 if (!IsSnapshotDevice(name)) {
1071 if (!current_metadata) {
1072 current_metadata = ReadCurrentMetadata();
1073 }
1074
1075 if (!current_metadata ||
1076 GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
1077 DeleteSnapshot(lock, name);
1078 return MergeResult(UpdateState::Cancelled);
1079 }
1080
1081 // During a check, we decided the merge was complete, but we were unable to
1082 // collapse the device-mapper stack and perform COW cleanup. If we haven't
1083 // rebooted after this check, the device will still be a snapshot-merge
1084 // target. If we have rebooted, the device will now be a linear target,
1085 // and we can try cleanup again.
1086 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1087 // NB: It's okay if this fails now, we gave cleanup our best effort.
1088 OnSnapshotMergeComplete(lock, name, snapshot_status);
1089 return MergeResult(UpdateState::MergeCompleted);
1090 }
1091
1092 LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name;
1093 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
1094 }
1095
1096 // This check is expensive so it is only enabled for debugging.
1097 DCHECK((current_metadata = ReadCurrentMetadata()) &&
1098 GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
1099
1100 std::string target_type;
1101 DmTargetSnapshot::Status status;
1102 if (!QuerySnapshotStatus(name, &target_type, &status)) {
1103 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
1104 }
1105 if (target_type == "snapshot" &&
1106 DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
1107 update_status.merge_phase() == MergePhase::FIRST_PHASE) {
1108 // The snapshot is not being merged because it's in the wrong phase.
1109 return MergeResult(UpdateState::None);
1110 }
1111 if (target_type != "snapshot-merge") {
1112 // We can get here if we failed to rewrite the target type in
1113 // InitiateMerge(). If we failed to create the target in first-stage
1114 // init, boot would not succeed.
1115 LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
1116 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
1117 }
1118
1119 // These two values are equal when merging is complete.
1120 if (status.sectors_allocated != status.metadata_sectors) {
1121 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
1122 LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete.";
1123 return MergeResult(UpdateState::MergeFailed,
1124 MergeFailureCode::UnmergedSectorsAfterCompletion);
1125 }
1126 return MergeResult(UpdateState::Merging);
1127 }
1128
1129 auto code = CheckMergeConsistency(lock, name, snapshot_status);
1130 if (code != MergeFailureCode::Ok) {
1131 return MergeResult(UpdateState::MergeFailed, code);
1132 }
1133
1134 // Merging is done. First, update the status file to indicate the merge
1135 // is complete. We do this before calling OnSnapshotMergeComplete, even
1136 // though this means the write is potentially wasted work (since in the
1137 // ideal case we'll immediately delete the file).
1138 //
1139 // This makes it simpler to reason about the next reboot: no matter what
1140 // part of cleanup failed, first-stage init won't try to create another
1141 // snapshot device for this partition.
1142 snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1143 if (!WriteSnapshotStatus(lock, snapshot_status)) {
1144 return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus);
1145 }
1146 if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1147 return MergeResult(UpdateState::MergeNeedsReboot);
1148 }
1149 return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
1150 }
1151
1152 // This returns the backing device, not the dm-user layer.
GetMappedCowDeviceName(const std::string & snapshot,const SnapshotStatus & status)1153 static std::string GetMappedCowDeviceName(const std::string& snapshot,
1154 const SnapshotStatus& status) {
1155 // If no partition was created (the COW exists entirely on /data), the
1156 // device-mapper layering is different than if we had a partition.
1157 if (status.cow_partition_size() == 0) {
1158 return GetCowImageDeviceName(snapshot);
1159 }
1160 return GetCowName(snapshot);
1161 }
1162
CheckMergeConsistency(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1163 MergeFailureCode SnapshotManager::CheckMergeConsistency(LockedFile* lock, const std::string& name,
1164 const SnapshotStatus& status) {
1165 CHECK(lock);
1166
1167 if (!status.compression_enabled()) {
1168 // Do not try to verify old-style COWs yet.
1169 return MergeFailureCode::Ok;
1170 }
1171
1172 auto& dm = DeviceMapper::Instance();
1173
1174 std::string cow_image_name = GetMappedCowDeviceName(name, status);
1175 std::string cow_image_path;
1176 if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_path)) {
1177 LOG(ERROR) << "Failed to get path for cow device: " << cow_image_name;
1178 return MergeFailureCode::GetCowPathConsistencyCheck;
1179 }
1180
1181 // First pass, count # of ops.
1182 size_t num_ops = 0;
1183 {
1184 unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_CLOEXEC));
1185 if (fd < 0) {
1186 PLOG(ERROR) << "Failed to open " << cow_image_name;
1187 return MergeFailureCode::OpenCowConsistencyCheck;
1188 }
1189
1190 CowReader reader;
1191 if (!reader.Parse(std::move(fd))) {
1192 LOG(ERROR) << "Failed to parse cow " << cow_image_path;
1193 return MergeFailureCode::ParseCowConsistencyCheck;
1194 }
1195
1196 for (auto iter = reader.GetOpIter(); !iter->Done(); iter->Next()) {
1197 if (!IsMetadataOp(iter->Get())) {
1198 num_ops++;
1199 }
1200 }
1201 }
1202
1203 // Second pass, try as hard as we can to get the actual number of blocks
1204 // the system thinks is merged.
1205 unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_DIRECT | O_SYNC | O_CLOEXEC));
1206 if (fd < 0) {
1207 PLOG(ERROR) << "Failed to open direct " << cow_image_name;
1208 return MergeFailureCode::OpenCowDirectConsistencyCheck;
1209 }
1210
1211 void* addr;
1212 size_t page_size = getpagesize();
1213 if (posix_memalign(&addr, page_size, page_size) < 0) {
1214 PLOG(ERROR) << "posix_memalign with page size " << page_size;
1215 return MergeFailureCode::MemAlignConsistencyCheck;
1216 }
1217
1218 // COWs are always at least 2MB, this is guaranteed in snapshot creation.
1219 std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
1220 if (!android::base::ReadFully(fd, buffer.get(), page_size)) {
1221 PLOG(ERROR) << "Direct read failed " << cow_image_name;
1222 return MergeFailureCode::DirectReadConsistencyCheck;
1223 }
1224
1225 auto header = reinterpret_cast<CowHeader*>(buffer.get());
1226 if (header->num_merge_ops != num_ops) {
1227 LOG(ERROR) << "COW consistency check failed, expected " << num_ops << " to be merged, "
1228 << "but " << header->num_merge_ops << " were actually recorded.";
1229 LOG(ERROR) << "Aborting merge progress for snapshot " << name
1230 << ", will try again next boot";
1231 return MergeFailureCode::WrongMergeCountConsistencyCheck;
1232 }
1233
1234 return MergeFailureCode::Ok;
1235 }
1236
MergeSecondPhaseSnapshots(LockedFile * lock)1237 MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
1238 std::vector<std::string> snapshots;
1239 if (!ListSnapshots(lock, &snapshots)) {
1240 return MergeFailureCode::ListSnapshots;
1241 }
1242
1243 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1244 CHECK(update_status.state() == UpdateState::Merging);
1245 CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE);
1246
1247 update_status.set_merge_phase(MergePhase::SECOND_PHASE);
1248 if (!WriteSnapshotUpdateStatus(lock, update_status)) {
1249 return MergeFailureCode::WriteStatus;
1250 }
1251
1252 MergeFailureCode result = MergeFailureCode::Ok;
1253 for (const auto& snapshot : snapshots) {
1254 SnapshotStatus snapshot_status;
1255 if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
1256 return MergeFailureCode::ReadStatus;
1257 }
1258 if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
1259 continue;
1260 }
1261 auto code = SwitchSnapshotToMerge(lock, snapshot);
1262 if (code != MergeFailureCode::Ok) {
1263 LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
1264 if (result == MergeFailureCode::Ok) {
1265 result = code;
1266 }
1267 }
1268 }
1269 return result;
1270 }
1271
GetSnapshotBootIndicatorPath()1272 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1273 return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1274 }
1275
GetRollbackIndicatorPath()1276 std::string SnapshotManager::GetRollbackIndicatorPath() {
1277 return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1278 }
1279
GetForwardMergeIndicatorPath()1280 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1281 return metadata_dir_ + "/allow-forward-merge";
1282 }
1283
GetOldPartitionMetadataPath()1284 std::string SnapshotManager::GetOldPartitionMetadataPath() {
1285 return metadata_dir_ + "/old-partition-metadata";
1286 }
1287
AcknowledgeMergeSuccess(LockedFile * lock)1288 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1289 // It's not possible to remove update state in recovery, so write an
1290 // indicator that cleanup is needed on reboot. If a factory data reset
1291 // was requested, it doesn't matter, everything will get wiped anyway.
1292 // To make testing easier we consider a /data wipe as cleaned up.
1293 if (device_->IsRecovery()) {
1294 WriteUpdateState(lock, UpdateState::MergeCompleted);
1295 return;
1296 }
1297
1298 RemoveAllUpdateState(lock);
1299 }
1300
AcknowledgeMergeFailure(MergeFailureCode failure_code)1301 void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) {
1302 // Log first, so worst case, we always have a record of why the calls below
1303 // were being made.
1304 LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1305
1306 auto lock = LockExclusive();
1307 if (!lock) return;
1308
1309 // Since we released the lock in between WaitForMerge and here, it's
1310 // possible (1) the merge successfully completed or (2) was already
1311 // marked as a failure. So make sure to check the state again, and
1312 // only mark as a failure if appropriate.
1313 UpdateState state = ReadUpdateState(lock.get());
1314 if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1315 return;
1316 }
1317
1318 WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code);
1319 }
1320
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1321 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1322 const SnapshotStatus& status) {
1323 if (IsSnapshotDevice(name)) {
1324 // We are extra-cautious here, to avoid deleting the wrong table.
1325 std::string target_type;
1326 DmTargetSnapshot::Status dm_status;
1327 if (!QuerySnapshotStatus(name, &target_type, &dm_status)) {
1328 return false;
1329 }
1330 if (target_type != "snapshot-merge") {
1331 LOG(ERROR) << "Unexpected target type " << target_type
1332 << " for snapshot device: " << name;
1333 return false;
1334 }
1335 if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1336 LOG(ERROR) << "Merge is unexpectedly incomplete for device " << name;
1337 return false;
1338 }
1339 if (!CollapseSnapshotDevice(name, status)) {
1340 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1341 return false;
1342 }
1343 // Note that collapsing is implicitly an Unmap, so we don't need to
1344 // unmap the snapshot.
1345 }
1346
1347 if (!DeleteSnapshot(lock, name)) {
1348 LOG(ERROR) << "Could not delete snapshot: " << name;
1349 return false;
1350 }
1351 return true;
1352 }
1353
CollapseSnapshotDevice(const std::string & name,const SnapshotStatus & status)1354 bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
1355 const SnapshotStatus& status) {
1356 auto& dm = DeviceMapper::Instance();
1357
1358 // Verify we have a snapshot-merge device.
1359 DeviceMapper::TargetInfo target;
1360 if (!GetSingleTarget(name, TableQuery::Table, &target)) {
1361 return false;
1362 }
1363 if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1364 // This should be impossible, it was checked earlier.
1365 LOG(ERROR) << "Snapshot device has invalid target type: " << name;
1366 return false;
1367 }
1368
1369 std::string base_device, cow_device;
1370 if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1371 LOG(ERROR) << "Could not parse snapshot device " << name << " parameters: " << target.data;
1372 return false;
1373 }
1374
1375 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1376 if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1377 LOG(ERROR) << "Snapshot " << name
1378 << " size is not sector aligned: " << status.snapshot_size();
1379 return false;
1380 }
1381
1382 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1383 // Create a DmTable that is identical to the base device.
1384 CreateLogicalPartitionParams base_device_params{
1385 .block_device = device_->GetSuperDevice(slot),
1386 .metadata_slot = slot,
1387 .partition_name = name,
1388 .partition_opener = &device_->GetPartitionOpener(),
1389 };
1390 DmTable table;
1391 if (!CreateDmTable(base_device_params, &table)) {
1392 LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1393 return false;
1394 }
1395
1396 if (!dm.LoadTableAndActivate(name, table)) {
1397 return false;
1398 }
1399
1400 // Attempt to delete the snapshot device if one still exists. Nothing
1401 // should be depending on the device, and device-mapper should have
1402 // flushed remaining I/O. We could in theory replace with dm-zero (or
1403 // re-use the table above), but for now it's better to know why this
1404 // would fail.
1405 if (status.compression_enabled()) {
1406 UnmapDmUserDevice(name);
1407 }
1408 auto base_name = GetBaseDeviceName(name);
1409 if (!DeleteDeviceIfExists(base_name)) {
1410 LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1411 }
1412
1413 if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
1414 LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
1415 }
1416
1417 return true;
1418 }
1419
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1420 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1421 const std::function<bool()>& before_cancel) {
1422 auto slot = GetCurrentSlot();
1423 if (slot == Slot::Unknown) {
1424 return false;
1425 }
1426
1427 // If all snapshots were reflashed, then cancel the entire update.
1428 if (AreAllSnapshotsCancelled(lock)) {
1429 LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1430 return RemoveAllUpdateState(lock, before_cancel);
1431 }
1432
1433 // If update has been rolled back, then cancel the entire update.
1434 // Client (update_engine) is responsible for doing additional cleanup work on its own states
1435 // when ProcessUpdateState() returns UpdateState::Cancelled.
1436 auto current_slot = GetCurrentSlot();
1437 if (current_slot != Slot::Source) {
1438 LOG(INFO) << "Update state is being processed while booting at " << current_slot
1439 << " slot, taking no action.";
1440 return false;
1441 }
1442
1443 // current_slot == Source. Attempt to detect rollbacks.
1444 if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1445 // This unverified update is not attempted. Take no action.
1446 PLOG(INFO) << "Rollback indicator not detected. "
1447 << "Update state is being processed before reboot, taking no action.";
1448 return false;
1449 }
1450
1451 LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1452 return RemoveAllUpdateState(lock, before_cancel);
1453 }
1454
PerformInitTransition(InitTransition transition,std::vector<std::string> * snapuserd_argv)1455 bool SnapshotManager::PerformInitTransition(InitTransition transition,
1456 std::vector<std::string>* snapuserd_argv) {
1457 LOG(INFO) << "Performing transition for snapuserd.";
1458
1459 // Don't use EnsuerSnapuserdConnected() because this is called from init,
1460 // and attempting to do so will deadlock.
1461 if (!snapuserd_client_ && transition != InitTransition::SELINUX_DETACH) {
1462 snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
1463 if (!snapuserd_client_) {
1464 LOG(ERROR) << "Unable to connect to snapuserd";
1465 return false;
1466 }
1467 }
1468
1469 auto& dm = DeviceMapper::Instance();
1470
1471 auto lock = LockExclusive();
1472 if (!lock) return false;
1473
1474 std::vector<std::string> snapshots;
1475 if (!ListSnapshots(lock.get(), &snapshots)) {
1476 LOG(ERROR) << "Failed to list snapshots.";
1477 return false;
1478 }
1479
1480 size_t num_cows = 0;
1481 size_t ok_cows = 0;
1482 for (const auto& snapshot : snapshots) {
1483 std::string user_cow_name = GetDmUserCowName(snapshot);
1484 if (dm.GetState(user_cow_name) == DmDeviceState::INVALID) {
1485 continue;
1486 }
1487
1488 DeviceMapper::TargetInfo target;
1489 if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
1490 continue;
1491 }
1492
1493 auto target_type = DeviceMapper::GetTargetType(target.spec);
1494 if (target_type != "user") {
1495 LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
1496 continue;
1497 }
1498
1499 num_cows++;
1500
1501 SnapshotStatus snapshot_status;
1502 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
1503 LOG(ERROR) << "Unable to read snapshot status: " << snapshot;
1504 continue;
1505 }
1506
1507 auto misc_name = user_cow_name;
1508
1509 DmTable table;
1510 table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
1511 if (!dm.LoadTableAndActivate(user_cow_name, table)) {
1512 LOG(ERROR) << "Unable to swap tables for " << misc_name;
1513 continue;
1514 }
1515
1516 std::string source_device;
1517 if (!dm.GetDmDevicePathByName(GetSourceDeviceName(snapshot), &source_device)) {
1518 LOG(ERROR) << "Could not get device path for " << GetSourceDeviceName(snapshot);
1519 continue;
1520 }
1521
1522 std::string cow_image_name = GetMappedCowDeviceName(snapshot, snapshot_status);
1523
1524 std::string cow_image_device;
1525 if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
1526 LOG(ERROR) << "Could not get device path for " << cow_image_name;
1527 continue;
1528 }
1529
1530 // Wait for ueventd to acknowledge and create the control device node.
1531 std::string control_device = "/dev/dm-user/" + misc_name;
1532 if (!WaitForDevice(control_device, 10s)) {
1533 LOG(ERROR) << "dm-user control device no found: " << misc_name;
1534 continue;
1535 }
1536
1537 if (transition == InitTransition::SELINUX_DETACH) {
1538 auto message = misc_name + "," + cow_image_device + "," + source_device;
1539 snapuserd_argv->emplace_back(std::move(message));
1540
1541 // Do not attempt to connect to the new snapuserd yet, it hasn't
1542 // been started. We do however want to wait for the misc device
1543 // to have been created.
1544 ok_cows++;
1545 continue;
1546 }
1547
1548 uint64_t base_sectors =
1549 snapuserd_client_->InitDmUserCow(misc_name, cow_image_device, source_device);
1550 if (base_sectors == 0) {
1551 // Unrecoverable as metadata reads from cow device failed
1552 LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd";
1553 return false;
1554 }
1555
1556 CHECK(base_sectors <= target.spec.length);
1557
1558 if (!snapuserd_client_->AttachDmUser(misc_name)) {
1559 // This error is unrecoverable. We cannot proceed because reads to
1560 // the underlying device will fail.
1561 LOG(FATAL) << "Could not initialize snapuserd for " << user_cow_name;
1562 return false;
1563 }
1564
1565 ok_cows++;
1566 }
1567
1568 if (ok_cows != num_cows) {
1569 LOG(ERROR) << "Could not transition all snapuserd consumers.";
1570 return false;
1571 }
1572 return true;
1573 }
1574
ReadCurrentMetadata()1575 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1576 const auto& opener = device_->GetPartitionOpener();
1577 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1578 auto super_device = device_->GetSuperDevice(slot);
1579 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1580 if (!metadata) {
1581 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1582 return nullptr;
1583 }
1584 return metadata;
1585 }
1586
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1587 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1588 const LpMetadata& metadata, const std::string& name) {
1589 auto partition = android::fs_mgr::FindPartition(metadata, name);
1590 if (!partition) return MetadataPartitionState::None;
1591 if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1592 return MetadataPartitionState::Updated;
1593 }
1594 return MetadataPartitionState::Flashed;
1595 }
1596
AreAllSnapshotsCancelled(LockedFile * lock)1597 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1598 std::vector<std::string> snapshots;
1599 if (!ListSnapshots(lock, &snapshots)) {
1600 LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1601 << "after applying an update. Assuming no snapshots.";
1602 // Let HandleCancelledUpdate resets UpdateState.
1603 return true;
1604 }
1605
1606 std::map<std::string, bool> flashing_status;
1607
1608 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1609 LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1610 << "removing update states.";
1611 return false;
1612 }
1613
1614 bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1615 [](const auto& pair) { return pair.second; });
1616
1617 if (all_snapshots_cancelled) {
1618 LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1619 }
1620 return all_snapshots_cancelled;
1621 }
1622
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1623 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1624 const std::vector<std::string>& snapshots,
1625 std::map<std::string, bool>* out) {
1626 CHECK(lock);
1627
1628 auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1629 if (source_slot_suffix.empty()) {
1630 return false;
1631 }
1632 uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1633 uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1634
1635 // Attempt to detect re-flashing on each partition.
1636 // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1637 // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1638 // deleted. Caller is responsible for merging the rest of the snapshots.
1639 // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1640 //
1641 // Note that we use target slot metadata, since if an OTA has been applied
1642 // to the target slot, we can detect the UPDATED flag. Any kind of flash
1643 // operation against dynamic partitions ensures that all copies of the
1644 // metadata are in sync, so flashing all partitions on the source slot will
1645 // remove the UPDATED flag on the target slot as well.
1646 const auto& opener = device_->GetPartitionOpener();
1647 auto super_device = device_->GetSuperDevice(target_slot);
1648 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1649 if (!metadata) {
1650 return false;
1651 }
1652
1653 for (const auto& snapshot_name : snapshots) {
1654 if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1655 MetadataPartitionState::Updated) {
1656 out->emplace(snapshot_name, false);
1657 } else {
1658 // Delete snapshots for partitions that are re-flashed after the update.
1659 LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1660 out->emplace(snapshot_name, true);
1661 }
1662 }
1663 return true;
1664 }
1665
RemoveAllSnapshots(LockedFile * lock)1666 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1667 std::vector<std::string> snapshots;
1668 if (!ListSnapshots(lock, &snapshots)) {
1669 LOG(ERROR) << "Could not list snapshots";
1670 return false;
1671 }
1672
1673 std::map<std::string, bool> flashing_status;
1674 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1675 LOG(WARNING) << "Failed to get flashing status";
1676 }
1677
1678 auto current_slot = GetCurrentSlot();
1679 bool ok = true;
1680 bool has_mapped_cow_images = false;
1681 for (const auto& name : snapshots) {
1682 // If booting off source slot, it is okay to unmap and delete all the snapshots.
1683 // If boot indicator is missing, update state is None or Initiated, so
1684 // it is also okay to unmap and delete all the snapshots.
1685 // If booting off target slot,
1686 // - should not unmap because:
1687 // - In Android mode, snapshots are not mapped, but
1688 // filesystems are mounting off dm-linear targets directly.
1689 // - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1690 // - If partition is flashed or unknown, it is okay to delete snapshots.
1691 // Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1692 // as dm-snapshot (for example, after merge completes).
1693 bool should_unmap = current_slot != Slot::Target;
1694 bool should_delete = ShouldDeleteSnapshot(flashing_status, current_slot, name);
1695 if (should_unmap && android::base::EndsWith(name, device_->GetSlotSuffix())) {
1696 // Something very unexpected has happened - we want to unmap this
1697 // snapshot, but it's on the wrong slot. We can't unmap an active
1698 // partition. If this is not really a snapshot, skip the unmap
1699 // step.
1700 auto& dm = DeviceMapper::Instance();
1701 if (dm.GetState(name) == DmDeviceState::INVALID || !IsSnapshotDevice(name)) {
1702 LOG(ERROR) << "Detected snapshot " << name << " on " << current_slot << " slot"
1703 << " for source partition; removing without unmap.";
1704 should_unmap = false;
1705 }
1706 }
1707
1708 bool partition_ok = true;
1709 if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1710 partition_ok = false;
1711 }
1712 if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1713 partition_ok = false;
1714 }
1715
1716 if (!partition_ok) {
1717 // Remember whether or not we were able to unmap the cow image.
1718 auto cow_image_device = GetCowImageDeviceName(name);
1719 has_mapped_cow_images |=
1720 (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
1721
1722 ok = false;
1723 }
1724 }
1725
1726 if (ok || !has_mapped_cow_images) {
1727 // Delete any image artifacts as a precaution, in case an update is
1728 // being cancelled due to some corrupted state in an lp_metadata file.
1729 // Note that we do not do this if some cow images are still mapped,
1730 // since we must not remove backing storage if it's in use.
1731 if (!EnsureImageManager() || !images_->RemoveAllImages()) {
1732 LOG(ERROR) << "Could not remove all snapshot artifacts";
1733 return false;
1734 }
1735 }
1736 return ok;
1737 }
1738
1739 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)1740 bool SnapshotManager::ShouldDeleteSnapshot(const std::map<std::string, bool>& flashing_status,
1741 Slot current_slot, const std::string& name) {
1742 if (current_slot != Slot::Target) {
1743 return true;
1744 }
1745 auto it = flashing_status.find(name);
1746 if (it == flashing_status.end()) {
1747 LOG(WARNING) << "Can't determine flashing status for " << name;
1748 return true;
1749 }
1750 if (it->second) {
1751 // partition flashed, okay to delete obsolete snapshots
1752 return true;
1753 }
1754 return !IsSnapshotDevice(name);
1755 }
1756
GetUpdateState(double * progress)1757 UpdateState SnapshotManager::GetUpdateState(double* progress) {
1758 // If we've never started an update, the state file won't exist.
1759 auto state_file = GetStateFilePath();
1760 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
1761 return UpdateState::None;
1762 }
1763
1764 auto lock = LockShared();
1765 if (!lock) {
1766 return UpdateState::None;
1767 }
1768
1769 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
1770 auto state = update_status.state();
1771 if (progress == nullptr) {
1772 return state;
1773 }
1774
1775 if (state == UpdateState::MergeCompleted) {
1776 *progress = 100.0;
1777 return state;
1778 }
1779
1780 *progress = 0.0;
1781 if (state != UpdateState::Merging) {
1782 return state;
1783 }
1784
1785 // Sum all the snapshot states as if the system consists of a single huge
1786 // snapshots device, then compute the merge completion percentage of that
1787 // device.
1788 std::vector<std::string> snapshots;
1789 if (!ListSnapshots(lock.get(), &snapshots)) {
1790 LOG(ERROR) << "Could not list snapshots";
1791 return state;
1792 }
1793
1794 DmTargetSnapshot::Status fake_snapshots_status = {};
1795 for (const auto& snapshot : snapshots) {
1796 DmTargetSnapshot::Status current_status;
1797
1798 if (!IsSnapshotDevice(snapshot)) continue;
1799 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) continue;
1800
1801 fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
1802 fake_snapshots_status.total_sectors += current_status.total_sectors;
1803 fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
1804 }
1805
1806 *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
1807 update_status.sectors_allocated());
1808
1809 return state;
1810 }
1811
UpdateUsesCompression()1812 bool SnapshotManager::UpdateUsesCompression() {
1813 auto lock = LockShared();
1814 if (!lock) return false;
1815 return UpdateUsesCompression(lock.get());
1816 }
1817
UpdateUsesCompression(LockedFile * lock)1818 bool SnapshotManager::UpdateUsesCompression(LockedFile* lock) {
1819 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
1820 return update_status.compression_enabled();
1821 }
1822
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots,const std::string & suffix)1823 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
1824 const std::string& suffix) {
1825 CHECK(lock);
1826
1827 auto dir_path = metadata_dir_ + "/snapshots"s;
1828 std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
1829 if (!dir) {
1830 PLOG(ERROR) << "opendir failed: " << dir_path;
1831 return false;
1832 }
1833
1834 struct dirent* dp;
1835 while ((dp = readdir(dir.get())) != nullptr) {
1836 if (dp->d_type != DT_REG) continue;
1837
1838 std::string name(dp->d_name);
1839 if (!suffix.empty() && !android::base::EndsWith(name, suffix)) {
1840 continue;
1841 }
1842 snapshots->emplace_back(std::move(name));
1843 }
1844 return true;
1845 }
1846
IsSnapshotManagerNeeded()1847 bool SnapshotManager::IsSnapshotManagerNeeded() {
1848 return access(kBootIndicatorPath, F_OK) == 0;
1849 }
1850
GetGlobalRollbackIndicatorPath()1851 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
1852 return kRollbackIndicatorPath;
1853 }
1854
NeedSnapshotsInFirstStageMount()1855 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
1856 // If we fail to read, we'll wind up using CreateLogicalPartitions, which
1857 // will create devices that look like the old slot, except with extra
1858 // content at the end of each device. This will confuse dm-verity, and
1859 // ultimately we'll fail to boot. Why not make it a fatal error and have
1860 // the reason be clearer? Because the indicator file still exists, and
1861 // if this was FATAL, reverting to the old slot would be broken.
1862 auto slot = GetCurrentSlot();
1863
1864 if (slot != Slot::Target) {
1865 if (slot == Slot::Source) {
1866 // Device is rebooting into the original slot, so mark this as a
1867 // rollback.
1868 auto path = GetRollbackIndicatorPath();
1869 if (!android::base::WriteStringToFile("1", path)) {
1870 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
1871 } else {
1872 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
1873 }
1874 }
1875 LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
1876 return false;
1877 }
1878
1879 // If we can't read the update state, it's unlikely anything else will
1880 // succeed, so this is a fatal error. We'll eventually exhaust boot
1881 // attempts and revert to the old slot.
1882 auto lock = LockShared();
1883 if (!lock) {
1884 LOG(FATAL) << "Could not read update state to determine snapshot status";
1885 return false;
1886 }
1887 switch (ReadUpdateState(lock.get())) {
1888 case UpdateState::Unverified:
1889 case UpdateState::Merging:
1890 case UpdateState::MergeFailed:
1891 return true;
1892 default:
1893 return false;
1894 }
1895 }
1896
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)1897 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
1898 const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
1899 LOG(INFO) << "Creating logical partitions with snapshots as needed";
1900
1901 auto lock = LockExclusive();
1902 if (!lock) return false;
1903
1904 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1905 return MapAllPartitions(lock.get(), super_device, slot, timeout_ms);
1906 }
1907
MapAllPartitions(LockedFile * lock,const std::string & super_device,uint32_t slot,const std::chrono::milliseconds & timeout_ms)1908 bool SnapshotManager::MapAllPartitions(LockedFile* lock, const std::string& super_device,
1909 uint32_t slot, const std::chrono::milliseconds& timeout_ms) {
1910 const auto& opener = device_->GetPartitionOpener();
1911 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1912 if (!metadata) {
1913 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1914 return false;
1915 }
1916
1917 if (!EnsureImageManager()) {
1918 return false;
1919 }
1920
1921 for (const auto& partition : metadata->partitions) {
1922 if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
1923 LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
1924 << kCowGroupName;
1925 continue;
1926 }
1927
1928 CreateLogicalPartitionParams params = {
1929 .block_device = super_device,
1930 .metadata = metadata.get(),
1931 .partition = &partition,
1932 .partition_opener = &opener,
1933 .timeout_ms = timeout_ms,
1934 };
1935 if (!MapPartitionWithSnapshot(lock, std::move(params), SnapshotContext::Mount, nullptr)) {
1936 return false;
1937 }
1938 }
1939
1940 LOG(INFO) << "Created logical partitions with snapshot.";
1941 return true;
1942 }
1943
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)1944 static std::chrono::milliseconds GetRemainingTime(
1945 const std::chrono::milliseconds& timeout,
1946 const std::chrono::time_point<std::chrono::steady_clock>& begin) {
1947 // If no timeout is specified, execute all commands without specifying any timeout.
1948 if (timeout.count() == 0) return std::chrono::milliseconds(0);
1949 auto passed_time = std::chrono::steady_clock::now() - begin;
1950 auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
1951 if (remaining_time.count() <= 0) {
1952 LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
1953 << remaining_time.count() << "ms remaining)";
1954 // Return min() instead of remaining_time here because 0 is treated as a special value for
1955 // no timeout, where the rest of the commands will still be executed.
1956 return std::chrono::milliseconds::min();
1957 }
1958 return remaining_time;
1959 }
1960
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,SnapshotContext context,SnapshotPaths * paths)1961 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
1962 CreateLogicalPartitionParams params,
1963 SnapshotContext context, SnapshotPaths* paths) {
1964 auto begin = std::chrono::steady_clock::now();
1965
1966 CHECK(lock);
1967
1968 if (params.GetPartitionName() != params.GetDeviceName()) {
1969 LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
1970 << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
1971 return false;
1972 }
1973
1974 // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
1975 // reading super partition metadata).
1976 CreateLogicalPartitionParams::OwnedData params_owned_data;
1977 if (!params.InitDefaults(¶ms_owned_data)) {
1978 return false;
1979 }
1980
1981 if (!params.partition->num_extents) {
1982 LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
1983 return true; // leave path empty to indicate that nothing is mapped.
1984 }
1985
1986 // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
1987 // partition still has a snapshot that needs to be mapped. If no live snapshot or merge
1988 // completed, live_snapshot_status is set to nullopt.
1989 std::optional<SnapshotStatus> live_snapshot_status;
1990 do {
1991 if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
1992 LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
1993 << params.GetPartitionName();
1994 break;
1995 }
1996 auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
1997 if (access(file_path.c_str(), F_OK) != 0) {
1998 if (errno != ENOENT) {
1999 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
2000 << ": Can't access " << file_path;
2001 return false;
2002 }
2003 break;
2004 }
2005 live_snapshot_status = std::make_optional<SnapshotStatus>();
2006 if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
2007 return false;
2008 }
2009 // No live snapshot if merge is completed.
2010 if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
2011 live_snapshot_status.reset();
2012 }
2013
2014 if (live_snapshot_status->state() == SnapshotState::NONE ||
2015 live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
2016 0) {
2017 LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
2018 << " is invalid, ignoring: state = "
2019 << SnapshotState_Name(live_snapshot_status->state())
2020 << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
2021 << ", cow_file_size = " << live_snapshot_status->cow_file_size();
2022 live_snapshot_status.reset();
2023 }
2024 } while (0);
2025
2026 if (live_snapshot_status.has_value()) {
2027 // dm-snapshot requires the base device to be writable.
2028 params.force_writable = true;
2029 // Map the base device with a different name to avoid collision.
2030 params.device_name = GetBaseDeviceName(params.GetPartitionName());
2031 }
2032
2033 AutoDeviceList created_devices;
2034
2035 // Create the base device for the snapshot, or if there is no snapshot, the
2036 // device itself. This device consists of the real blocks in the super
2037 // partition that this logical partition occupies.
2038 auto& dm = DeviceMapper::Instance();
2039 std::string base_path;
2040 if (!CreateLogicalPartition(params, &base_path)) {
2041 LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
2042 << " as device " << params.GetDeviceName();
2043 return false;
2044 }
2045 created_devices.EmplaceBack<AutoUnmapDevice>(&dm, params.GetDeviceName());
2046
2047 if (paths) {
2048 paths->target_device = base_path;
2049 }
2050
2051 if (!live_snapshot_status.has_value()) {
2052 created_devices.Release();
2053 return true;
2054 }
2055
2056 // We don't have ueventd in first-stage init, so use device major:minor
2057 // strings instead.
2058 std::string base_device;
2059 if (!dm.GetDeviceString(params.GetDeviceName(), &base_device)) {
2060 LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
2061 return false;
2062 }
2063
2064 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2065 if (remaining_time.count() < 0) return false;
2066
2067 std::string cow_name;
2068 CreateLogicalPartitionParams cow_params = params;
2069 cow_params.timeout_ms = remaining_time;
2070 if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
2071 return false;
2072 }
2073 std::string cow_device;
2074 if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
2075 LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
2076 return false;
2077 }
2078 if (paths) {
2079 paths->cow_device_name = cow_name;
2080 }
2081
2082 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2083 if (remaining_time.count() < 0) return false;
2084
2085 if (context == SnapshotContext::Update && live_snapshot_status->compression_enabled()) {
2086 // Stop here, we can't run dm-user yet, the COW isn't built.
2087 created_devices.Release();
2088 return true;
2089 }
2090
2091 if (live_snapshot_status->compression_enabled()) {
2092 // Get the source device (eg the view of the partition from before it was resized).
2093 std::string source_device_path;
2094 if (!MapSourceDevice(lock, params.GetPartitionName(), remaining_time,
2095 &source_device_path)) {
2096 LOG(ERROR) << "Could not map source device for: " << cow_name;
2097 return false;
2098 }
2099
2100 auto source_device = GetSourceDeviceName(params.GetPartitionName());
2101 created_devices.EmplaceBack<AutoUnmapDevice>(&dm, source_device);
2102
2103 if (!WaitForDevice(source_device_path, remaining_time)) {
2104 return false;
2105 }
2106
2107 std::string cow_path;
2108 if (!GetMappedImageDevicePath(cow_name, &cow_path)) {
2109 LOG(ERROR) << "Could not determine path for: " << cow_name;
2110 return false;
2111 }
2112 if (!WaitForDevice(cow_path, remaining_time)) {
2113 return false;
2114 }
2115
2116 auto name = GetDmUserCowName(params.GetPartitionName());
2117
2118 std::string new_cow_device;
2119 if (!MapDmUserCow(lock, name, cow_path, source_device_path, remaining_time,
2120 &new_cow_device)) {
2121 LOG(ERROR) << "Could not map dm-user device for partition "
2122 << params.GetPartitionName();
2123 return false;
2124 }
2125 created_devices.EmplaceBack<AutoUnmapDevice>(&dm, name);
2126
2127 remaining_time = GetRemainingTime(params.timeout_ms, begin);
2128 if (remaining_time.count() < 0) return false;
2129
2130 cow_device = new_cow_device;
2131 }
2132
2133 std::string path;
2134 if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
2135 &path)) {
2136 LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
2137 return false;
2138 }
2139 // No need to add params.GetPartitionName() to created_devices since it is immediately released.
2140
2141 if (paths) {
2142 paths->snapshot_device = path;
2143 }
2144
2145 created_devices.Release();
2146
2147 LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << path;
2148 return true;
2149 }
2150
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)2151 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
2152 const std::string& target_partition_name) {
2153 CHECK(lock);
2154
2155 if (!UnmapSnapshot(lock, target_partition_name)) {
2156 return false;
2157 }
2158
2159 if (!UnmapCowDevices(lock, target_partition_name)) {
2160 return false;
2161 }
2162
2163 auto base_name = GetBaseDeviceName(target_partition_name);
2164 if (!DeleteDeviceIfExists(base_name)) {
2165 LOG(ERROR) << "Cannot delete base device: " << base_name;
2166 return false;
2167 }
2168
2169 auto source_name = GetSourceDeviceName(target_partition_name);
2170 if (!DeleteDeviceIfExists(source_name)) {
2171 LOG(ERROR) << "Cannot delete source device: " << source_name;
2172 return false;
2173 }
2174
2175 LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
2176
2177 return true;
2178 }
2179
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)2180 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
2181 const SnapshotStatus& snapshot_status,
2182 AutoDeviceList* created_devices, std::string* cow_name) {
2183 CHECK(lock);
2184 CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
2185 auto begin = std::chrono::steady_clock::now();
2186
2187 std::string partition_name = params.GetPartitionName();
2188 std::string cow_image_name = GetCowImageDeviceName(partition_name);
2189 *cow_name = GetCowName(partition_name);
2190
2191 auto& dm = DeviceMapper::Instance();
2192
2193 // Map COW image if necessary.
2194 if (snapshot_status.cow_file_size() > 0) {
2195 if (!EnsureImageManager()) return false;
2196 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2197 if (remaining_time.count() < 0) return false;
2198
2199 if (!MapCowImage(partition_name, remaining_time).has_value()) {
2200 LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
2201 return false;
2202 }
2203 created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
2204
2205 // If no COW partition exists, just return the image alone.
2206 if (snapshot_status.cow_partition_size() == 0) {
2207 *cow_name = std::move(cow_image_name);
2208 LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
2209 return true;
2210 }
2211 }
2212
2213 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
2214 if (remaining_time.count() < 0) return false;
2215
2216 CHECK(snapshot_status.cow_partition_size() > 0);
2217
2218 // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
2219 // COW image device as the last extent.
2220 CreateLogicalPartitionParams cow_partition_params = params;
2221 cow_partition_params.partition = nullptr;
2222 cow_partition_params.partition_name = *cow_name;
2223 cow_partition_params.device_name.clear();
2224 DmTable table;
2225 if (!CreateDmTable(cow_partition_params, &table)) {
2226 return false;
2227 }
2228 // If the COW image exists, append it as the last extent.
2229 if (snapshot_status.cow_file_size() > 0) {
2230 std::string cow_image_device;
2231 if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
2232 LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
2233 return false;
2234 }
2235 auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
2236 auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
2237 table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
2238 0);
2239 }
2240
2241 // We have created the DmTable now. Map it.
2242 std::string cow_path;
2243 if (!dm.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
2244 LOG(ERROR) << "Could not create COW device: " << *cow_name;
2245 return false;
2246 }
2247 created_devices->EmplaceBack<AutoUnmapDevice>(&dm, *cow_name);
2248 LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
2249 return true;
2250 }
2251
UnmapCowDevices(LockedFile * lock,const std::string & name)2252 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
2253 CHECK(lock);
2254 if (!EnsureImageManager()) return false;
2255
2256 if (UpdateUsesCompression(lock) && !UnmapDmUserDevice(name)) {
2257 return false;
2258 }
2259
2260 if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
2261 LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
2262 return false;
2263 }
2264
2265 std::string cow_image_name = GetCowImageDeviceName(name);
2266 if (!images_->UnmapImageIfExists(cow_image_name)) {
2267 LOG(ERROR) << "Cannot unmap image " << cow_image_name;
2268 return false;
2269 }
2270 return true;
2271 }
2272
UnmapDmUserDevice(const std::string & snapshot_name)2273 bool SnapshotManager::UnmapDmUserDevice(const std::string& snapshot_name) {
2274 auto& dm = DeviceMapper::Instance();
2275
2276 auto dm_user_name = GetDmUserCowName(snapshot_name);
2277 if (dm.GetState(dm_user_name) == DmDeviceState::INVALID) {
2278 return true;
2279 }
2280
2281 if (!DeleteDeviceIfExists(dm_user_name)) {
2282 LOG(ERROR) << "Cannot unmap " << dm_user_name;
2283 return false;
2284 }
2285
2286 if (EnsureSnapuserdConnected()) {
2287 if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
2288 LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
2289 return false;
2290 }
2291 }
2292
2293 // Ensure the control device is gone so we don't run into ABA problems.
2294 auto control_device = "/dev/dm-user/" + dm_user_name;
2295 if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
2296 LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
2297 return false;
2298 }
2299 return true;
2300 }
2301
MapAllSnapshots(const std::chrono::milliseconds & timeout_ms)2302 bool SnapshotManager::MapAllSnapshots(const std::chrono::milliseconds& timeout_ms) {
2303 auto lock = LockExclusive();
2304 if (!lock) return false;
2305
2306 auto state = ReadUpdateState(lock.get());
2307 if (state == UpdateState::Unverified) {
2308 if (GetCurrentSlot() == Slot::Target) {
2309 LOG(ERROR) << "Cannot call MapAllSnapshots when booting from the target slot.";
2310 return false;
2311 }
2312 } else if (state != UpdateState::Initiated) {
2313 LOG(ERROR) << "Cannot call MapAllSnapshots from update state: " << state;
2314 return false;
2315 }
2316
2317 std::vector<std::string> snapshots;
2318 if (!ListSnapshots(lock.get(), &snapshots)) {
2319 return false;
2320 }
2321
2322 const auto& opener = device_->GetPartitionOpener();
2323 auto slot_suffix = device_->GetOtherSlotSuffix();
2324 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2325 auto super_device = device_->GetSuperDevice(slot_number);
2326 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot_number);
2327 if (!metadata) {
2328 LOG(ERROR) << "MapAllSnapshots could not read dynamic partition metadata for device: "
2329 << super_device;
2330 return false;
2331 }
2332
2333 for (const auto& snapshot : snapshots) {
2334 if (!UnmapPartitionWithSnapshot(lock.get(), snapshot)) {
2335 LOG(ERROR) << "MapAllSnapshots could not unmap snapshot: " << snapshot;
2336 return false;
2337 }
2338
2339 CreateLogicalPartitionParams params = {
2340 .block_device = super_device,
2341 .metadata = metadata.get(),
2342 .partition_name = snapshot,
2343 .partition_opener = &opener,
2344 .timeout_ms = timeout_ms,
2345 };
2346 if (!MapPartitionWithSnapshot(lock.get(), std::move(params), SnapshotContext::Mount,
2347 nullptr)) {
2348 LOG(ERROR) << "MapAllSnapshots failed to map: " << snapshot;
2349 return false;
2350 }
2351 }
2352
2353 LOG(INFO) << "MapAllSnapshots succeeded.";
2354 return true;
2355 }
2356
UnmapAllSnapshots()2357 bool SnapshotManager::UnmapAllSnapshots() {
2358 auto lock = LockExclusive();
2359 if (!lock) return false;
2360
2361 return UnmapAllSnapshots(lock.get());
2362 }
2363
UnmapAllSnapshots(LockedFile * lock)2364 bool SnapshotManager::UnmapAllSnapshots(LockedFile* lock) {
2365 std::vector<std::string> snapshots;
2366 if (!ListSnapshots(lock, &snapshots)) {
2367 return false;
2368 }
2369
2370 for (const auto& snapshot : snapshots) {
2371 if (!UnmapPartitionWithSnapshot(lock, snapshot)) {
2372 LOG(ERROR) << "Failed to unmap snapshot: " << snapshot;
2373 return false;
2374 }
2375 }
2376
2377 // Terminate the daemon and release the snapuserd_client_ object.
2378 // If we need to re-connect with the daemon, EnsureSnapuserdConnected()
2379 // will re-create the object and establish the socket connection.
2380 if (snapuserd_client_) {
2381 LOG(INFO) << "Shutdown snapuserd daemon";
2382 snapuserd_client_->DetachSnapuserd();
2383 snapuserd_client_->CloseConnection();
2384 snapuserd_client_ = nullptr;
2385 }
2386
2387 return true;
2388 }
2389
OpenFile(const std::string & file,int lock_flags)2390 auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
2391 -> std::unique_ptr<LockedFile> {
2392 unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2393 if (fd < 0) {
2394 PLOG(ERROR) << "Open failed: " << file;
2395 return nullptr;
2396 }
2397 if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
2398 PLOG(ERROR) << "Acquire flock failed: " << file;
2399 return nullptr;
2400 }
2401 // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
2402 // calls, so strip extra flags.
2403 int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
2404 return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
2405 }
2406
~LockedFile()2407 SnapshotManager::LockedFile::~LockedFile() {
2408 if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
2409 PLOG(ERROR) << "Failed to unlock file: " << path_;
2410 }
2411 }
2412
GetStateFilePath() const2413 std::string SnapshotManager::GetStateFilePath() const {
2414 return metadata_dir_ + "/state"s;
2415 }
2416
GetMergeStateFilePath() const2417 std::string SnapshotManager::GetMergeStateFilePath() const {
2418 return metadata_dir_ + "/merge_state"s;
2419 }
2420
GetLockPath() const2421 std::string SnapshotManager::GetLockPath() const {
2422 return metadata_dir_;
2423 }
2424
OpenLock(int lock_flags)2425 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
2426 auto lock_file = GetLockPath();
2427 return OpenFile(lock_file, lock_flags);
2428 }
2429
LockShared()2430 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
2431 return OpenLock(LOCK_SH);
2432 }
2433
LockExclusive()2434 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
2435 return OpenLock(LOCK_EX);
2436 }
2437
UpdateStateFromString(const std::string & contents)2438 static UpdateState UpdateStateFromString(const std::string& contents) {
2439 if (contents.empty() || contents == "none") {
2440 return UpdateState::None;
2441 } else if (contents == "initiated") {
2442 return UpdateState::Initiated;
2443 } else if (contents == "unverified") {
2444 return UpdateState::Unverified;
2445 } else if (contents == "merging") {
2446 return UpdateState::Merging;
2447 } else if (contents == "merge-completed") {
2448 return UpdateState::MergeCompleted;
2449 } else if (contents == "merge-needs-reboot") {
2450 return UpdateState::MergeNeedsReboot;
2451 } else if (contents == "merge-failed") {
2452 return UpdateState::MergeFailed;
2453 } else if (contents == "cancelled") {
2454 return UpdateState::Cancelled;
2455 } else {
2456 LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
2457 return UpdateState::None;
2458 }
2459 }
2460
operator <<(std::ostream & os,UpdateState state)2461 std::ostream& operator<<(std::ostream& os, UpdateState state) {
2462 switch (state) {
2463 case UpdateState::None:
2464 return os << "none";
2465 case UpdateState::Initiated:
2466 return os << "initiated";
2467 case UpdateState::Unverified:
2468 return os << "unverified";
2469 case UpdateState::Merging:
2470 return os << "merging";
2471 case UpdateState::MergeCompleted:
2472 return os << "merge-completed";
2473 case UpdateState::MergeNeedsReboot:
2474 return os << "merge-needs-reboot";
2475 case UpdateState::MergeFailed:
2476 return os << "merge-failed";
2477 case UpdateState::Cancelled:
2478 return os << "cancelled";
2479 default:
2480 LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
2481 return os;
2482 }
2483 }
2484
ReadUpdateState(LockedFile * lock)2485 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
2486 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
2487 return status.state();
2488 }
2489
ReadSnapshotUpdateStatus(LockedFile * lock)2490 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
2491 CHECK(lock);
2492
2493 SnapshotUpdateStatus status = {};
2494 std::string contents;
2495 if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
2496 PLOG(ERROR) << "Read state file failed";
2497 status.set_state(UpdateState::None);
2498 return status;
2499 }
2500
2501 if (!status.ParseFromString(contents)) {
2502 LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
2503
2504 // Try to rollback to legacy file to support devices that are
2505 // currently using the old file format.
2506 // TODO(b/147409432)
2507 status.set_state(UpdateStateFromString(contents));
2508 }
2509
2510 return status;
2511 }
2512
WriteUpdateState(LockedFile * lock,UpdateState state,MergeFailureCode failure_code)2513 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state,
2514 MergeFailureCode failure_code) {
2515 SnapshotUpdateStatus status;
2516 status.set_state(state);
2517
2518 switch (state) {
2519 case UpdateState::MergeFailed:
2520 status.set_merge_failure_code(failure_code);
2521 break;
2522 case UpdateState::Initiated:
2523 status.set_source_build_fingerprint(
2524 android::base::GetProperty("ro.build.fingerprint", ""));
2525 break;
2526 default:
2527 break;
2528 }
2529
2530 // If we're transitioning between two valid states (eg, we're not beginning
2531 // or ending an OTA), then make sure to propagate the compression bit and
2532 // build fingerprint.
2533 if (!(state == UpdateState::Initiated || state == UpdateState::None)) {
2534 SnapshotUpdateStatus old_status = ReadSnapshotUpdateStatus(lock);
2535 status.set_compression_enabled(old_status.compression_enabled());
2536 status.set_source_build_fingerprint(old_status.source_build_fingerprint());
2537 }
2538 return WriteSnapshotUpdateStatus(lock, status);
2539 }
2540
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)2541 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
2542 const SnapshotUpdateStatus& status) {
2543 CHECK(lock);
2544 CHECK(lock->lock_mode() == LOCK_EX);
2545
2546 std::string contents;
2547 if (!status.SerializeToString(&contents)) {
2548 LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
2549 return false;
2550 }
2551
2552 #ifdef LIBSNAPSHOT_USE_HAL
2553 auto merge_status = MergeStatus::UNKNOWN;
2554 switch (status.state()) {
2555 // The needs-reboot and completed cases imply that /data and /metadata
2556 // can be safely wiped, so we don't report a merge status.
2557 case UpdateState::None:
2558 case UpdateState::MergeNeedsReboot:
2559 case UpdateState::MergeCompleted:
2560 case UpdateState::Initiated:
2561 merge_status = MergeStatus::NONE;
2562 break;
2563 case UpdateState::Unverified:
2564 merge_status = MergeStatus::SNAPSHOTTED;
2565 break;
2566 case UpdateState::Merging:
2567 case UpdateState::MergeFailed:
2568 merge_status = MergeStatus::MERGING;
2569 break;
2570 default:
2571 // Note that Cancelled flows to here - it is never written, since
2572 // it only communicates a transient state to the caller.
2573 LOG(ERROR) << "Unexpected update status: " << status.state();
2574 break;
2575 }
2576
2577 bool set_before_write =
2578 merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
2579 if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2580 return false;
2581 }
2582 #endif
2583
2584 if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
2585 PLOG(ERROR) << "Could not write to state file";
2586 return false;
2587 }
2588
2589 #ifdef LIBSNAPSHOT_USE_HAL
2590 if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2591 return false;
2592 }
2593 #endif
2594 return true;
2595 }
2596
GetSnapshotStatusFilePath(const std::string & name)2597 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
2598 auto file = metadata_dir_ + "/snapshots/"s + name;
2599 return file;
2600 }
2601
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)2602 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
2603 SnapshotStatus* status) {
2604 CHECK(lock);
2605 auto path = GetSnapshotStatusFilePath(name);
2606
2607 unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2608 if (fd < 0) {
2609 PLOG(ERROR) << "Open failed: " << path;
2610 return false;
2611 }
2612
2613 if (!status->ParseFromFileDescriptor(fd.get())) {
2614 PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
2615 return false;
2616 }
2617
2618 if (status->name() != name) {
2619 LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
2620 status->set_name(name);
2621 }
2622
2623 return true;
2624 }
2625
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)2626 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
2627 // The caller must take an exclusive lock to modify snapshots.
2628 CHECK(lock);
2629 CHECK(lock->lock_mode() == LOCK_EX);
2630 CHECK(!status.name().empty());
2631
2632 auto path = GetSnapshotStatusFilePath(status.name());
2633
2634 std::string content;
2635 if (!status.SerializeToString(&content)) {
2636 LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
2637 return false;
2638 }
2639
2640 if (!WriteStringToFileAtomic(content, path)) {
2641 PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
2642 return false;
2643 }
2644
2645 return true;
2646 }
2647
EnsureImageManager()2648 bool SnapshotManager::EnsureImageManager() {
2649 if (images_) return true;
2650
2651 images_ = device_->OpenImageManager();
2652 if (!images_) {
2653 LOG(ERROR) << "Could not open ImageManager";
2654 return false;
2655 }
2656 return true;
2657 }
2658
EnsureSnapuserdConnected()2659 bool SnapshotManager::EnsureSnapuserdConnected() {
2660 if (snapuserd_client_) {
2661 return true;
2662 }
2663
2664 if (!use_first_stage_snapuserd_ && !EnsureSnapuserdStarted()) {
2665 return false;
2666 }
2667
2668 snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
2669 if (!snapuserd_client_) {
2670 LOG(ERROR) << "Unable to connect to snapuserd";
2671 return false;
2672 }
2673 return true;
2674 }
2675
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)2676 void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
2677 std::vector<std::string> to_delete;
2678 for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
2679 if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
2680 LOG(WARNING) << existing_cow_partition->name()
2681 << " cannot be unmapped and its space cannot be reclaimed";
2682 continue;
2683 }
2684 to_delete.push_back(existing_cow_partition->name());
2685 }
2686 for (const auto& name : to_delete) {
2687 current_metadata->RemovePartition(name);
2688 }
2689 }
2690
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2691 static Return AddRequiredSpace(Return orig,
2692 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2693 if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
2694 return orig;
2695 }
2696 uint64_t sum = 0;
2697 for (auto&& [name, status] : all_snapshot_status) {
2698 sum += status.cow_file_size();
2699 }
2700 return Return::NoSpace(sum);
2701 }
2702
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)2703 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
2704 auto lock = LockExclusive();
2705 if (!lock) return Return::Error();
2706
2707 auto update_state = ReadUpdateState(lock.get());
2708 if (update_state != UpdateState::Initiated) {
2709 LOG(ERROR) << "Cannot create update snapshots in state " << update_state;
2710 return Return::Error();
2711 }
2712
2713 // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
2714 // partition takes up a big chunk of space in super, causing COW images to be created on
2715 // retrofit Virtual A/B devices.
2716 if (device_->IsOverlayfsSetup()) {
2717 LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
2718 << ", reboot, then try again.";
2719 return Return::Error();
2720 }
2721
2722 const auto& opener = device_->GetPartitionOpener();
2723 auto current_suffix = device_->GetSlotSuffix();
2724 uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
2725 auto target_suffix = device_->GetOtherSlotSuffix();
2726 uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
2727 auto current_super = device_->GetSuperDevice(current_slot);
2728
2729 auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
2730 if (current_metadata == nullptr) {
2731 LOG(ERROR) << "Cannot create metadata builder.";
2732 return Return::Error();
2733 }
2734
2735 auto target_metadata =
2736 MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
2737 if (target_metadata == nullptr) {
2738 LOG(ERROR) << "Cannot create target metadata builder.";
2739 return Return::Error();
2740 }
2741
2742 // Delete partitions with target suffix in |current_metadata|. Otherwise,
2743 // partition_cow_creator recognizes these left-over partitions as used space.
2744 for (const auto& group_name : current_metadata->ListGroups()) {
2745 if (android::base::EndsWith(group_name, target_suffix)) {
2746 current_metadata->RemoveGroupAndPartitions(group_name);
2747 }
2748 }
2749
2750 SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
2751 if (!metadata_updater.Update()) {
2752 LOG(ERROR) << "Cannot calculate new metadata.";
2753 return Return::Error();
2754 }
2755
2756 // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
2757 // free regions.
2758 UnmapAndDeleteCowPartition(current_metadata.get());
2759
2760 // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
2761 // devices with retrofit dynamic partitions does not make sense.
2762 // This ensures that current_metadata->GetFreeRegions() uses the same device
2763 // indices as target_metadata (i.e. 0 -> "super").
2764 // This is also assumed in MapCowDevices() call below.
2765 CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
2766 target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
2767
2768 std::map<std::string, SnapshotStatus> all_snapshot_status;
2769
2770 // In case of error, automatically delete devices that are created along the way.
2771 // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
2772 // these devices.
2773 AutoDeviceList created_devices;
2774
2775 const auto& dap_metadata = manifest.dynamic_partition_metadata();
2776 CowOptions options;
2777 CowWriter writer(options);
2778 bool cow_format_support = true;
2779 if (dap_metadata.cow_version() < writer.GetCowVersion()) {
2780 cow_format_support = false;
2781 }
2782
2783 LOG(INFO) << " dap_metadata.cow_version(): " << dap_metadata.cow_version()
2784 << " writer.GetCowVersion(): " << writer.GetCowVersion();
2785
2786 bool use_compression = IsCompressionEnabled() && dap_metadata.vabc_enabled() &&
2787 !device_->IsRecovery() && cow_format_support;
2788
2789 std::string compression_algorithm;
2790 if (use_compression) {
2791 compression_algorithm = dap_metadata.vabc_compression_param();
2792 if (compression_algorithm.empty()) {
2793 // Older OTAs don't set an explicit compression type, so default to gz.
2794 compression_algorithm = "gz";
2795 }
2796 } else {
2797 compression_algorithm = "none";
2798 }
2799
2800 PartitionCowCreator cow_creator{
2801 .target_metadata = target_metadata.get(),
2802 .target_suffix = target_suffix,
2803 .target_partition = nullptr,
2804 .current_metadata = current_metadata.get(),
2805 .current_suffix = current_suffix,
2806 .update = nullptr,
2807 .extra_extents = {},
2808 .compression_enabled = use_compression,
2809 .compression_algorithm = compression_algorithm,
2810 };
2811
2812 auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
2813 &all_snapshot_status);
2814 if (!ret.is_ok()) return ret;
2815
2816 auto exported_target_metadata = target_metadata->Export();
2817 if (exported_target_metadata == nullptr) {
2818 LOG(ERROR) << "Cannot export target metadata";
2819 return Return::Error();
2820 }
2821
2822 ret = InitializeUpdateSnapshots(lock.get(), target_metadata.get(),
2823 exported_target_metadata.get(), target_suffix,
2824 all_snapshot_status);
2825 if (!ret.is_ok()) return ret;
2826
2827 if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
2828 *exported_target_metadata, target_slot)) {
2829 LOG(ERROR) << "Cannot write target metadata";
2830 return Return::Error();
2831 }
2832
2833 // If compression is enabled, we need to retain a copy of the old metadata
2834 // so we can access original blocks in case they are moved around. We do
2835 // not want to rely on the old super metadata slot because we don't
2836 // guarantee its validity after the slot switch is successful.
2837 if (cow_creator.compression_enabled) {
2838 auto metadata = current_metadata->Export();
2839 if (!metadata) {
2840 LOG(ERROR) << "Could not export current metadata";
2841 return Return::Error();
2842 }
2843
2844 auto path = GetOldPartitionMetadataPath();
2845 if (!android::fs_mgr::WriteToImageFile(path, *metadata.get())) {
2846 LOG(ERROR) << "Cannot write old metadata to " << path;
2847 return Return::Error();
2848 }
2849 }
2850
2851 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
2852 status.set_state(update_state);
2853 status.set_compression_enabled(cow_creator.compression_enabled);
2854 if (!WriteSnapshotUpdateStatus(lock.get(), status)) {
2855 LOG(ERROR) << "Unable to write new update state";
2856 return Return::Error();
2857 }
2858
2859 created_devices.Release();
2860 LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
2861
2862 return Return::Ok();
2863 }
2864
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)2865 Return SnapshotManager::CreateUpdateSnapshotsInternal(
2866 LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
2867 AutoDeviceList* created_devices,
2868 std::map<std::string, SnapshotStatus>* all_snapshot_status) {
2869 CHECK(lock);
2870
2871 auto* target_metadata = cow_creator->target_metadata;
2872 const auto& target_suffix = cow_creator->target_suffix;
2873
2874 if (!target_metadata->AddGroup(kCowGroupName, 0)) {
2875 LOG(ERROR) << "Cannot add group " << kCowGroupName;
2876 return Return::Error();
2877 }
2878
2879 std::map<std::string, const PartitionUpdate*> partition_map;
2880 std::map<std::string, std::vector<Extent>> extra_extents_map;
2881 for (const auto& partition_update : manifest.partitions()) {
2882 auto suffixed_name = partition_update.partition_name() + target_suffix;
2883 auto&& [it, inserted] = partition_map.emplace(suffixed_name, &partition_update);
2884 if (!inserted) {
2885 LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
2886 << " in update manifest.";
2887 return Return::Error();
2888 }
2889
2890 auto& extra_extents = extra_extents_map[suffixed_name];
2891 if (partition_update.has_hash_tree_extent()) {
2892 extra_extents.push_back(partition_update.hash_tree_extent());
2893 }
2894 if (partition_update.has_fec_extent()) {
2895 extra_extents.push_back(partition_update.fec_extent());
2896 }
2897 }
2898
2899 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2900 cow_creator->target_partition = target_partition;
2901 cow_creator->update = nullptr;
2902 auto iter = partition_map.find(target_partition->name());
2903 if (iter != partition_map.end()) {
2904 cow_creator->update = iter->second;
2905 } else {
2906 LOG(INFO) << target_partition->name()
2907 << " isn't included in the payload, skipping the cow creation.";
2908 continue;
2909 }
2910
2911 cow_creator->extra_extents.clear();
2912 auto extra_extents_it = extra_extents_map.find(target_partition->name());
2913 if (extra_extents_it != extra_extents_map.end()) {
2914 cow_creator->extra_extents = std::move(extra_extents_it->second);
2915 }
2916
2917 // Compute the device sizes for the partition.
2918 auto cow_creator_ret = cow_creator->Run();
2919 if (!cow_creator_ret.has_value()) {
2920 LOG(ERROR) << "PartitionCowCreator returned no value for " << target_partition->name();
2921 return Return::Error();
2922 }
2923
2924 LOG(INFO) << "For partition " << target_partition->name()
2925 << ", device size = " << cow_creator_ret->snapshot_status.device_size()
2926 << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
2927 << ", cow partition size = "
2928 << cow_creator_ret->snapshot_status.cow_partition_size()
2929 << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
2930
2931 // Delete any existing snapshot before re-creating one.
2932 if (!DeleteSnapshot(lock, target_partition->name())) {
2933 LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
2934 << target_partition->name();
2935 return Return::Error();
2936 }
2937
2938 // It is possible that the whole partition uses free space in super, and snapshot / COW
2939 // would not be needed. In this case, skip the partition.
2940 bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
2941 bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
2942 cow_creator_ret->snapshot_status.cow_file_size()) > 0;
2943 CHECK(needs_snapshot == needs_cow);
2944
2945 if (!needs_snapshot) {
2946 LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
2947 << "because nothing needs to be snapshotted.";
2948 continue;
2949 }
2950
2951 // Find the original partition size.
2952 auto name = target_partition->name();
2953 auto old_partition_name =
2954 name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix;
2955 auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name);
2956 if (old_partition) {
2957 cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size());
2958 }
2959
2960 // Store these device sizes to snapshot status file.
2961 if (!CreateSnapshot(lock, cow_creator, &cow_creator_ret->snapshot_status)) {
2962 return Return::Error();
2963 }
2964 created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
2965
2966 // Create the COW partition. That is, use any remaining free space in super partition before
2967 // creating the COW images.
2968 if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
2969 CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
2970 << "cow_partition_size == "
2971 << cow_creator_ret->snapshot_status.cow_partition_size()
2972 << " is not a multiple of sector size " << kSectorSize;
2973 auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
2974 kCowGroupName, 0 /* flags */);
2975 if (cow_partition == nullptr) {
2976 return Return::Error();
2977 }
2978
2979 if (!target_metadata->ResizePartition(
2980 cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
2981 cow_creator_ret->cow_partition_usable_regions)) {
2982 LOG(ERROR) << "Cannot create COW partition on metadata with size "
2983 << cow_creator_ret->snapshot_status.cow_partition_size();
2984 return Return::Error();
2985 }
2986 // Only the in-memory target_metadata is modified; nothing to clean up if there is an
2987 // error in the future.
2988 }
2989
2990 all_snapshot_status->emplace(target_partition->name(),
2991 std::move(cow_creator_ret->snapshot_status));
2992
2993 LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
2994 }
2995
2996 LOG(INFO) << "Allocating CoW images.";
2997
2998 for (auto&& [name, snapshot_status] : *all_snapshot_status) {
2999 // Create the backing COW image if necessary.
3000 if (snapshot_status.cow_file_size() > 0) {
3001 auto ret = CreateCowImage(lock, name);
3002 if (!ret.is_ok()) return AddRequiredSpace(ret, *all_snapshot_status);
3003 }
3004
3005 LOG(INFO) << "Successfully created snapshot for " << name;
3006 }
3007
3008 return Return::Ok();
3009 }
3010
InitializeUpdateSnapshots(LockedFile * lock,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)3011 Return SnapshotManager::InitializeUpdateSnapshots(
3012 LockedFile* lock, MetadataBuilder* target_metadata,
3013 const LpMetadata* exported_target_metadata, const std::string& target_suffix,
3014 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
3015 CHECK(lock);
3016
3017 CreateLogicalPartitionParams cow_params{
3018 .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
3019 .metadata = exported_target_metadata,
3020 .timeout_ms = std::chrono::milliseconds::max(),
3021 .partition_opener = &device_->GetPartitionOpener(),
3022 };
3023 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
3024 AutoDeviceList created_devices_for_cow;
3025
3026 if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
3027 LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
3028 << target_partition->name();
3029 return Return::Error();
3030 }
3031
3032 auto it = all_snapshot_status.find(target_partition->name());
3033 if (it == all_snapshot_status.end()) continue;
3034 cow_params.partition_name = target_partition->name();
3035 std::string cow_name;
3036 if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
3037 return Return::Error();
3038 }
3039
3040 std::string cow_path;
3041 if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
3042 LOG(ERROR) << "Cannot determine path for " << cow_name;
3043 return Return::Error();
3044 }
3045
3046 if (it->second.compression_enabled()) {
3047 unique_fd fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3048 if (fd < 0) {
3049 PLOG(ERROR) << "open " << cow_path << " failed for snapshot "
3050 << cow_params.partition_name;
3051 return Return::Error();
3052 }
3053
3054 CowOptions options;
3055 if (device()->IsTestDevice()) {
3056 options.scratch_space = false;
3057 }
3058 options.compression = it->second.compression_algorithm();
3059
3060 CowWriter writer(options);
3061 if (!writer.Initialize(fd) || !writer.Finalize()) {
3062 LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
3063 return Return::Error();
3064 }
3065 } else {
3066 auto ret = InitializeKernelCow(cow_path);
3067 if (!ret.is_ok()) {
3068 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
3069 << cow_path;
3070 return AddRequiredSpace(ret, all_snapshot_status);
3071 }
3072 }
3073 // Let destructor of created_devices_for_cow to unmap the COW devices.
3074 };
3075 return Return::Ok();
3076 }
3077
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)3078 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
3079 std::string* snapshot_path) {
3080 auto lock = LockShared();
3081 if (!lock) return false;
3082 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3083 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3084 << params.GetPartitionName();
3085 return false;
3086 }
3087
3088 SnapshotStatus status;
3089 if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3090 return false;
3091 }
3092 if (status.compression_enabled()) {
3093 LOG(ERROR) << "Cannot use MapUpdateSnapshot with compressed snapshots";
3094 return false;
3095 }
3096
3097 SnapshotPaths paths;
3098 if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3099 return false;
3100 }
3101
3102 if (!paths.snapshot_device.empty()) {
3103 *snapshot_path = paths.snapshot_device;
3104 } else {
3105 *snapshot_path = paths.target_device;
3106 }
3107 DCHECK(!snapshot_path->empty());
3108 return true;
3109 }
3110
OpenSnapshotWriter(const android::fs_mgr::CreateLogicalPartitionParams & params,const std::optional<std::string> & source_device)3111 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenSnapshotWriter(
3112 const android::fs_mgr::CreateLogicalPartitionParams& params,
3113 const std::optional<std::string>& source_device) {
3114 #if defined(LIBSNAPSHOT_NO_COW_WRITE)
3115 (void)params;
3116 (void)source_device;
3117
3118 LOG(ERROR) << "Snapshots cannot be written in first-stage init or recovery";
3119 return nullptr;
3120 #else
3121 // First unmap any existing mapping.
3122 auto lock = LockShared();
3123 if (!lock) return nullptr;
3124 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
3125 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
3126 << params.GetPartitionName();
3127 return nullptr;
3128 }
3129
3130 SnapshotPaths paths;
3131 if (!MapPartitionWithSnapshot(lock.get(), params, SnapshotContext::Update, &paths)) {
3132 return nullptr;
3133 }
3134
3135 SnapshotStatus status;
3136 if (!paths.cow_device_name.empty()) {
3137 if (!ReadSnapshotStatus(lock.get(), params.GetPartitionName(), &status)) {
3138 return nullptr;
3139 }
3140 } else {
3141 // Currently, partition_cow_creator always creates snapshots. The
3142 // reason is that if partition X shrinks while partition Y grows, we
3143 // cannot bindly write to the newly freed extents in X. This would
3144 // make the old slot unusable. So, the entire size of the target
3145 // partition is currently considered snapshottable.
3146 LOG(ERROR) << "No snapshot available for partition " << params.GetPartitionName();
3147 return nullptr;
3148 }
3149
3150 if (status.compression_enabled()) {
3151 return OpenCompressedSnapshotWriter(lock.get(), source_device, params.GetPartitionName(),
3152 status, paths);
3153 }
3154 return OpenKernelSnapshotWriter(lock.get(), source_device, params.GetPartitionName(), status,
3155 paths);
3156 #endif
3157 }
3158
3159 #if !defined(LIBSNAPSHOT_NO_COW_WRITE)
OpenCompressedSnapshotWriter(LockedFile * lock,const std::optional<std::string> & source_device,const std::string & partition_name,const SnapshotStatus & status,const SnapshotPaths & paths)3160 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenCompressedSnapshotWriter(
3161 LockedFile* lock, const std::optional<std::string>& source_device,
3162 [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3163 const SnapshotPaths& paths) {
3164 CHECK(lock);
3165
3166 CowOptions cow_options;
3167 cow_options.compression = status.compression_algorithm();
3168 cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3169 // Disable scratch space for vts tests
3170 if (device()->IsTestDevice()) {
3171 cow_options.scratch_space = false;
3172 }
3173
3174 // Currently we don't support partial snapshots, since partition_cow_creator
3175 // never creates this scenario.
3176 CHECK(status.snapshot_size() == status.device_size());
3177
3178 auto writer = std::make_unique<CompressedSnapshotWriter>(cow_options);
3179 if (source_device) {
3180 writer->SetSourceDevice(*source_device);
3181 }
3182
3183 std::string cow_path;
3184 if (!GetMappedImageDevicePath(paths.cow_device_name, &cow_path)) {
3185 LOG(ERROR) << "Could not determine path for " << paths.cow_device_name;
3186 return nullptr;
3187 }
3188
3189 unique_fd cow_fd(open(cow_path.c_str(), O_RDWR | O_CLOEXEC));
3190 if (cow_fd < 0) {
3191 PLOG(ERROR) << "OpenCompressedSnapshotWriter: open " << cow_path;
3192 return nullptr;
3193 }
3194 if (!writer->SetCowDevice(std::move(cow_fd))) {
3195 LOG(ERROR) << "Could not create COW writer from " << cow_path;
3196 return nullptr;
3197 }
3198
3199 return writer;
3200 }
3201
OpenKernelSnapshotWriter(LockedFile * lock,const std::optional<std::string> & source_device,const std::string & partition_name,const SnapshotStatus & status,const SnapshotPaths & paths)3202 std::unique_ptr<ISnapshotWriter> SnapshotManager::OpenKernelSnapshotWriter(
3203 LockedFile* lock, const std::optional<std::string>& source_device,
3204 [[maybe_unused]] const std::string& partition_name, const SnapshotStatus& status,
3205 const SnapshotPaths& paths) {
3206 CHECK(lock);
3207
3208 CowOptions cow_options;
3209 cow_options.max_blocks = {status.device_size() / cow_options.block_size};
3210
3211 auto writer = std::make_unique<OnlineKernelSnapshotWriter>(cow_options);
3212
3213 std::string path = paths.snapshot_device.empty() ? paths.target_device : paths.snapshot_device;
3214 unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC));
3215 if (fd < 0) {
3216 PLOG(ERROR) << "open failed: " << path;
3217 return nullptr;
3218 }
3219
3220 if (source_device) {
3221 writer->SetSourceDevice(*source_device);
3222 }
3223
3224 uint64_t cow_size = status.cow_partition_size() + status.cow_file_size();
3225 writer->SetSnapshotDevice(std::move(fd), cow_size);
3226
3227 return writer;
3228 }
3229 #endif // !defined(LIBSNAPSHOT_NO_COW_WRITE)
3230
UnmapUpdateSnapshot(const std::string & target_partition_name)3231 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
3232 auto lock = LockShared();
3233 if (!lock) return false;
3234 return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
3235 }
3236
UnmapAllPartitionsInRecovery()3237 bool SnapshotManager::UnmapAllPartitionsInRecovery() {
3238 auto lock = LockExclusive();
3239 if (!lock) return false;
3240
3241 const auto& opener = device_->GetPartitionOpener();
3242 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3243 auto super_device = device_->GetSuperDevice(slot);
3244 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
3245 if (!metadata) {
3246 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
3247 return false;
3248 }
3249
3250 bool ok = true;
3251 for (const auto& partition : metadata->partitions) {
3252 auto partition_name = GetPartitionName(partition);
3253 ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
3254 }
3255 return ok;
3256 }
3257
operator <<(std::ostream & os,SnapshotManager::Slot slot)3258 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
3259 switch (slot) {
3260 case SnapshotManager::Slot::Unknown:
3261 return os << "unknown";
3262 case SnapshotManager::Slot::Source:
3263 return os << "source";
3264 case SnapshotManager::Slot::Target:
3265 return os << "target";
3266 }
3267 }
3268
Dump(std::ostream & os)3269 bool SnapshotManager::Dump(std::ostream& os) {
3270 // Don't actually lock. Dump() is for debugging purposes only, so it is okay
3271 // if it is racy.
3272 auto file = OpenLock(0 /* lock flag */);
3273 if (!file) return false;
3274
3275 std::stringstream ss;
3276
3277 auto update_status = ReadSnapshotUpdateStatus(file.get());
3278
3279 ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
3280 ss << "Compression: " << update_status.compression_enabled() << std::endl;
3281 ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
3282 ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
3283 ss << "Rollback indicator: "
3284 << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3285 << std::endl;
3286 ss << "Forward merge indicator: "
3287 << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
3288 << std::endl;
3289 ss << "Source build fingerprint: " << update_status.source_build_fingerprint() << std::endl;
3290
3291 bool ok = true;
3292 std::vector<std::string> snapshots;
3293 if (!ListSnapshots(file.get(), &snapshots)) {
3294 LOG(ERROR) << "Could not list snapshots";
3295 snapshots.clear();
3296 ok = false;
3297 }
3298 for (const auto& name : snapshots) {
3299 ss << "Snapshot: " << name << std::endl;
3300 SnapshotStatus status;
3301 if (!ReadSnapshotStatus(file.get(), name, &status)) {
3302 ok = false;
3303 continue;
3304 }
3305 ss << " state: " << SnapshotState_Name(status.state()) << std::endl;
3306 ss << " device size (bytes): " << status.device_size() << std::endl;
3307 ss << " snapshot size (bytes): " << status.snapshot_size() << std::endl;
3308 ss << " cow partition size (bytes): " << status.cow_partition_size() << std::endl;
3309 ss << " cow file size (bytes): " << status.cow_file_size() << std::endl;
3310 ss << " allocated sectors: " << status.sectors_allocated() << std::endl;
3311 ss << " metadata sectors: " << status.metadata_sectors() << std::endl;
3312 ss << " compression: " << status.compression_algorithm() << std::endl;
3313 }
3314 os << ss.rdbuf();
3315 return ok;
3316 }
3317
EnsureMetadataMounted()3318 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
3319 if (!device_->IsRecovery()) {
3320 // No need to mount anything in recovery.
3321 LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
3322 return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
3323 }
3324 auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
3325 if (ret == nullptr) return nullptr;
3326
3327 // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
3328 // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
3329 // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
3330 // treat this case as if /metadata is not mounted.
3331 if (!LockShared()) {
3332 LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
3333 "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
3334 return nullptr;
3335 }
3336 return ret;
3337 }
3338
HandleImminentDataWipe(const std::function<void ()> & callback)3339 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
3340 if (!device_->IsRecovery()) {
3341 LOG(ERROR) << "Data wipes are only allowed in recovery.";
3342 return false;
3343 }
3344
3345 auto mount = EnsureMetadataMounted();
3346 if (!mount || !mount->HasDevice()) {
3347 // We allow the wipe to continue, because if we can't mount /metadata,
3348 // it is unlikely the device would have booted anyway. If there is no
3349 // metadata partition, then the device predates Virtual A/B.
3350 return true;
3351 }
3352
3353 // Check this early, so we don't accidentally start trying to populate
3354 // the state file in recovery. Note we don't call GetUpdateState since
3355 // we want errors in acquiring the lock to be propagated, instead of
3356 // returning UpdateState::None.
3357 auto state_file = GetStateFilePath();
3358 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3359 return true;
3360 }
3361
3362 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3363 auto super_path = device_->GetSuperDevice(slot_number);
3364 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3365 LOG(ERROR) << "Unable to map partitions to complete merge.";
3366 return false;
3367 }
3368
3369 auto process_callback = [&]() -> bool {
3370 if (callback) {
3371 callback();
3372 }
3373 return true;
3374 };
3375
3376 in_factory_data_reset_ = true;
3377 UpdateState state =
3378 ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
3379 in_factory_data_reset_ = false;
3380
3381 if (state == UpdateState::MergeFailed) {
3382 return false;
3383 }
3384
3385 // Nothing should be depending on partitions now, so unmap them all.
3386 if (!UnmapAllPartitionsInRecovery()) {
3387 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3388 }
3389
3390 if (state != UpdateState::None) {
3391 auto lock = LockExclusive();
3392 if (!lock) return false;
3393
3394 // Zap the update state so the bootloader doesn't think we're still
3395 // merging. It's okay if this fails, it's informative only at this
3396 // point.
3397 WriteUpdateState(lock.get(), UpdateState::None);
3398 }
3399 return true;
3400 }
3401
FinishMergeInRecovery()3402 bool SnapshotManager::FinishMergeInRecovery() {
3403 if (!device_->IsRecovery()) {
3404 LOG(ERROR) << "Data wipes are only allowed in recovery.";
3405 return false;
3406 }
3407
3408 auto mount = EnsureMetadataMounted();
3409 if (!mount || !mount->HasDevice()) {
3410 return false;
3411 }
3412
3413 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3414 auto super_path = device_->GetSuperDevice(slot_number);
3415 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3416 LOG(ERROR) << "Unable to map partitions to complete merge.";
3417 return false;
3418 }
3419
3420 UpdateState state = ProcessUpdateState();
3421 if (state != UpdateState::MergeCompleted) {
3422 LOG(ERROR) << "Merge returned unexpected status: " << state;
3423 return false;
3424 }
3425
3426 // Nothing should be depending on partitions now, so unmap them all.
3427 if (!UnmapAllPartitionsInRecovery()) {
3428 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
3429 }
3430 return true;
3431 }
3432
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)3433 UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
3434 const std::function<bool()>& callback) {
3435 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
3436 UpdateState state = ProcessUpdateState(callback);
3437 LOG(INFO) << "Update state in recovery: " << state;
3438 switch (state) {
3439 case UpdateState::MergeFailed:
3440 LOG(ERROR) << "Unrecoverable merge failure detected.";
3441 return state;
3442 case UpdateState::Unverified: {
3443 // If an OTA was just applied but has not yet started merging:
3444 //
3445 // - if forward merge is allowed, initiate merge and call
3446 // ProcessUpdateState again.
3447 //
3448 // - if forward merge is not allowed, we
3449 // have no choice but to revert slots, because the current slot will
3450 // immediately become unbootable. Rather than wait for the device
3451 // to reboot N times until a rollback, we proactively disable the
3452 // new slot instead.
3453 //
3454 // Since the rollback is inevitable, we don't treat a HAL failure
3455 // as an error here.
3456 auto slot = GetCurrentSlot();
3457 if (slot == Slot::Target) {
3458 if (allow_forward_merge &&
3459 access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
3460 LOG(INFO) << "Forward merge allowed, initiating merge now.";
3461
3462 if (!InitiateMerge()) {
3463 LOG(ERROR) << "Failed to initiate merge on data wipe.";
3464 return UpdateState::MergeFailed;
3465 }
3466 return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
3467 }
3468
3469 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
3470 device_->SetSlotAsUnbootable(slot_number);
3471 } else {
3472 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
3473 }
3474 break;
3475 }
3476 case UpdateState::MergeNeedsReboot:
3477 // We shouldn't get here, because nothing is depending on
3478 // logical partitions.
3479 LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
3480 break;
3481 default:
3482 break;
3483 }
3484 return state;
3485 }
3486
EnsureNoOverflowSnapshot(LockedFile * lock)3487 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
3488 CHECK(lock);
3489
3490 std::vector<std::string> snapshots;
3491 if (!ListSnapshots(lock, &snapshots)) {
3492 LOG(ERROR) << "Could not list snapshots.";
3493 return false;
3494 }
3495
3496 auto& dm = DeviceMapper::Instance();
3497 for (const auto& snapshot : snapshots) {
3498 SnapshotStatus status;
3499 if (!ReadSnapshotStatus(lock, snapshot, &status)) {
3500 return false;
3501 }
3502 if (status.compression_enabled()) {
3503 continue;
3504 }
3505
3506 std::vector<DeviceMapper::TargetInfo> targets;
3507 if (!dm.GetTableStatus(snapshot, &targets)) {
3508 LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
3509 return false;
3510 }
3511 if (targets.size() != 1) {
3512 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
3513 << ", size = " << targets.size();
3514 return false;
3515 }
3516 if (targets[0].IsOverflowSnapshot()) {
3517 LOG(ERROR) << "Detected overflow in snapshot " << snapshot
3518 << ", CoW device size computation is wrong!";
3519 return false;
3520 }
3521 }
3522
3523 return true;
3524 }
3525
RecoveryCreateSnapshotDevices()3526 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
3527 if (!device_->IsRecovery()) {
3528 LOG(ERROR) << __func__ << " is only allowed in recovery.";
3529 return CreateResult::NOT_CREATED;
3530 }
3531
3532 auto mount = EnsureMetadataMounted();
3533 if (!mount || !mount->HasDevice()) {
3534 LOG(ERROR) << "Couldn't mount Metadata.";
3535 return CreateResult::NOT_CREATED;
3536 }
3537 return RecoveryCreateSnapshotDevices(mount);
3538 }
3539
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)3540 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
3541 const std::unique_ptr<AutoDevice>& metadata_device) {
3542 if (!device_->IsRecovery()) {
3543 LOG(ERROR) << __func__ << " is only allowed in recovery.";
3544 return CreateResult::NOT_CREATED;
3545 }
3546
3547 if (metadata_device == nullptr || !metadata_device->HasDevice()) {
3548 LOG(ERROR) << "Metadata not mounted.";
3549 return CreateResult::NOT_CREATED;
3550 }
3551
3552 auto state_file = GetStateFilePath();
3553 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
3554 LOG(ERROR) << "Couldn't access state file.";
3555 return CreateResult::NOT_CREATED;
3556 }
3557
3558 if (!NeedSnapshotsInFirstStageMount()) {
3559 return CreateResult::NOT_CREATED;
3560 }
3561
3562 auto slot_suffix = device_->GetOtherSlotSuffix();
3563 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
3564 auto super_path = device_->GetSuperDevice(slot_number);
3565 if (!CreateLogicalAndSnapshotPartitions(super_path, 20s)) {
3566 LOG(ERROR) << "Unable to map partitions.";
3567 return CreateResult::ERROR;
3568 }
3569 return CreateResult::CREATED;
3570 }
3571
UpdateForwardMergeIndicator(bool wipe)3572 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
3573 auto path = GetForwardMergeIndicatorPath();
3574
3575 if (!wipe) {
3576 LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
3577 return RemoveFileIfExists(path);
3578 }
3579
3580 // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
3581
3582 LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
3583 if (!android::base::WriteStringToFile("1", path)) {
3584 PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
3585 return false;
3586 }
3587
3588 return true;
3589 }
3590
GetSnapshotMergeStatsInstance()3591 ISnapshotMergeStats* SnapshotManager::GetSnapshotMergeStatsInstance() {
3592 return SnapshotMergeStats::GetInstance(*this);
3593 }
3594
3595 // This is only to be used in recovery or normal Android (not first-stage init).
3596 // We don't guarantee dm paths are available in first-stage init, because ueventd
3597 // isn't running yet.
GetMappedImageDevicePath(const std::string & device_name,std::string * device_path)3598 bool SnapshotManager::GetMappedImageDevicePath(const std::string& device_name,
3599 std::string* device_path) {
3600 auto& dm = DeviceMapper::Instance();
3601
3602 // Try getting the device string if it is a device mapper device.
3603 if (dm.GetState(device_name) != DmDeviceState::INVALID) {
3604 return dm.GetDmDevicePathByName(device_name, device_path);
3605 }
3606
3607 // Otherwise, get path from IImageManager.
3608 return images_->GetMappedImageDevice(device_name, device_path);
3609 }
3610
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)3611 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
3612 std::string* device_string_or_mapped_path) {
3613 auto& dm = DeviceMapper::Instance();
3614 // Try getting the device string if it is a device mapper device.
3615 if (dm.GetState(device_name) != DmDeviceState::INVALID) {
3616 return dm.GetDeviceString(device_name, device_string_or_mapped_path);
3617 }
3618
3619 // Otherwise, get path from IImageManager.
3620 if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
3621 return false;
3622 }
3623
3624 LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
3625 << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
3626 << "may not be available in first stage init! ";
3627 return true;
3628 }
3629
WaitForDevice(const std::string & device,std::chrono::milliseconds timeout_ms)3630 bool SnapshotManager::WaitForDevice(const std::string& device,
3631 std::chrono::milliseconds timeout_ms) {
3632 if (!android::base::StartsWith(device, "/")) {
3633 return true;
3634 }
3635
3636 // In first-stage init, we rely on init setting a callback which can
3637 // regenerate uevents and populate /dev for us.
3638 if (uevent_regen_callback_) {
3639 if (!uevent_regen_callback_(device)) {
3640 LOG(ERROR) << "Failed to find device after regenerating uevents: " << device;
3641 return false;
3642 }
3643 return true;
3644 }
3645
3646 // Otherwise, the only kind of device we need to wait for is a dm-user
3647 // misc device. Normal calls to DeviceMapper::CreateDevice() guarantee
3648 // the path has been created.
3649 if (!android::base::StartsWith(device, "/dev/dm-user/")) {
3650 return true;
3651 }
3652
3653 if (timeout_ms.count() == 0) {
3654 LOG(ERROR) << "No timeout was specified to wait for device: " << device;
3655 return false;
3656 }
3657 if (!android::fs_mgr::WaitForFile(device, timeout_ms)) {
3658 LOG(ERROR) << "Timed out waiting for device to appear: " << device;
3659 return false;
3660 }
3661 return true;
3662 }
3663
IsSnapuserdRequired()3664 bool SnapshotManager::IsSnapuserdRequired() {
3665 auto lock = LockExclusive();
3666 if (!lock) return false;
3667
3668 auto status = ReadSnapshotUpdateStatus(lock.get());
3669 return status.state() != UpdateState::None && status.compression_enabled();
3670 }
3671
DetachSnapuserdForSelinux(std::vector<std::string> * snapuserd_argv)3672 bool SnapshotManager::DetachSnapuserdForSelinux(std::vector<std::string>* snapuserd_argv) {
3673 return PerformInitTransition(InitTransition::SELINUX_DETACH, snapuserd_argv);
3674 }
3675
PerformSecondStageInitTransition()3676 bool SnapshotManager::PerformSecondStageInitTransition() {
3677 return PerformInitTransition(InitTransition::SECOND_STAGE);
3678 }
3679
ReadOldPartitionMetadata(LockedFile * lock)3680 const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) {
3681 CHECK(lock);
3682
3683 if (!old_partition_metadata_) {
3684 auto path = GetOldPartitionMetadataPath();
3685 old_partition_metadata_ = android::fs_mgr::ReadFromImageFile(path);
3686 if (!old_partition_metadata_) {
3687 LOG(ERROR) << "Could not read old partition metadata from " << path;
3688 return nullptr;
3689 }
3690 }
3691 return old_partition_metadata_.get();
3692 }
3693
DecideMergePhase(const SnapshotStatus & status)3694 MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) {
3695 if (status.compression_enabled() && status.device_size() < status.old_partition_size()) {
3696 return MergePhase::FIRST_PHASE;
3697 }
3698 return MergePhase::SECOND_PHASE;
3699 }
3700
UpdateCowStats(ISnapshotMergeStats * stats)3701 void SnapshotManager::UpdateCowStats(ISnapshotMergeStats* stats) {
3702 auto lock = LockExclusive();
3703 if (!lock) return;
3704
3705 std::vector<std::string> snapshots;
3706 if (!ListSnapshots(lock.get(), &snapshots, GetSnapshotSlotSuffix())) {
3707 LOG(ERROR) << "Could not list snapshots";
3708 return;
3709 }
3710
3711 uint64_t cow_file_size = 0;
3712 uint64_t total_cow_size = 0;
3713 uint64_t estimated_cow_size = 0;
3714 for (const auto& snapshot : snapshots) {
3715 SnapshotStatus status;
3716 if (!ReadSnapshotStatus(lock.get(), snapshot, &status)) {
3717 return;
3718 }
3719
3720 cow_file_size += status.cow_file_size();
3721 total_cow_size += status.cow_file_size() + status.cow_partition_size();
3722 estimated_cow_size += status.estimated_cow_size();
3723 }
3724
3725 stats->set_cow_file_size(cow_file_size);
3726 stats->set_total_cow_size_bytes(total_cow_size);
3727 stats->set_estimated_cow_size_bytes(estimated_cow_size);
3728 }
3729
DeleteDeviceIfExists(const std::string & name,const std::chrono::milliseconds & timeout_ms)3730 bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
3731 const std::chrono::milliseconds& timeout_ms) {
3732 auto& dm = DeviceMapper::Instance();
3733 auto start = std::chrono::steady_clock::now();
3734 while (true) {
3735 if (dm.DeleteDeviceIfExists(name)) {
3736 return true;
3737 }
3738 auto now = std::chrono::steady_clock::now();
3739 auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
3740 if (elapsed >= timeout_ms) {
3741 break;
3742 }
3743 std::this_thread::sleep_for(400ms);
3744 }
3745
3746 // Try to diagnose why this failed. First get the actual device path.
3747 std::string full_path;
3748 if (!dm.GetDmDevicePathByName(name, &full_path)) {
3749 LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
3750 return false;
3751 }
3752
3753 // Check for child dm-devices.
3754 std::string block_name = android::base::Basename(full_path);
3755 std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
3756
3757 std::error_code ec;
3758 std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
3759 if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
3760 LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
3761 return false;
3762 }
3763
3764 // Check for mounted partitions.
3765 android::fs_mgr::Fstab fstab;
3766 android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
3767 for (const auto& entry : fstab) {
3768 if (android::base::Basename(entry.blk_device) == block_name) {
3769 LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
3770 return false;
3771 }
3772 }
3773
3774 // Check for detached mounted partitions.
3775 for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
3776 std::string fs_type = android::base::Basename(fs.path().c_str());
3777 if (!(fs_type == "ext4" || fs_type == "f2fs")) {
3778 continue;
3779 }
3780
3781 std::string path = fs.path().c_str() + "/"s + block_name;
3782 if (access(path.c_str(), F_OK) == 0) {
3783 LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
3784 << "; possibly open file descriptor or attached loop device.";
3785 return false;
3786 }
3787 }
3788
3789 LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")"
3790 << " still in use."
3791 << " Probably a file descriptor was leaked or held open, or a loop device is"
3792 << " attached.";
3793 return false;
3794 }
3795
ReadMergeFailureCode()3796 MergeFailureCode SnapshotManager::ReadMergeFailureCode() {
3797 auto lock = LockExclusive();
3798 if (!lock) return MergeFailureCode::AcquireLock;
3799
3800 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3801 if (status.state() != UpdateState::MergeFailed) {
3802 return MergeFailureCode::Ok;
3803 }
3804 return status.merge_failure_code();
3805 }
3806
ReadSourceBuildFingerprint()3807 std::string SnapshotManager::ReadSourceBuildFingerprint() {
3808 auto lock = LockExclusive();
3809 if (!lock) return {};
3810
3811 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
3812 return status.source_build_fingerprint();
3813 }
3814
3815 } // namespace snapshot
3816 } // namespace android
3817