1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <libsnapshot/snapshot.h>
16
17 #include <dirent.h>
18 #include <math.h>
19 #include <sys/file.h>
20 #include <sys/types.h>
21 #include <sys/unistd.h>
22
23 #include <optional>
24 #include <thread>
25 #include <unordered_set>
26
27 #include <android-base/file.h>
28 #include <android-base/logging.h>
29 #include <android-base/parseint.h>
30 #include <android-base/strings.h>
31 #include <android-base/unique_fd.h>
32 #include <ext4_utils/ext4_utils.h>
33 #include <fs_mgr.h>
34 #include <fs_mgr_dm_linear.h>
35 #include <fstab/fstab.h>
36 #include <libdm/dm.h>
37 #include <libfiemap/image_manager.h>
38 #include <liblp/liblp.h>
39
40 #include <android/snapshot/snapshot.pb.h>
41 #include <libsnapshot/snapshot_stats.h>
42 #include "device_info.h"
43 #include "partition_cow_creator.h"
44 #include "snapshot_metadata_updater.h"
45 #include "utility.h"
46
47 namespace android {
48 namespace snapshot {
49
50 using android::base::unique_fd;
51 using android::dm::DeviceMapper;
52 using android::dm::DmDeviceState;
53 using android::dm::DmTable;
54 using android::dm::DmTargetLinear;
55 using android::dm::DmTargetSnapshot;
56 using android::dm::kSectorSize;
57 using android::dm::SnapshotStorageMode;
58 using android::fiemap::FiemapStatus;
59 using android::fiemap::IImageManager;
60 using android::fs_mgr::CreateDmTable;
61 using android::fs_mgr::CreateLogicalPartition;
62 using android::fs_mgr::CreateLogicalPartitionParams;
63 using android::fs_mgr::GetPartitionGroupName;
64 using android::fs_mgr::GetPartitionName;
65 using android::fs_mgr::LpMetadata;
66 using android::fs_mgr::MetadataBuilder;
67 using android::fs_mgr::SlotNumberForSlotSuffix;
68 using android::hardware::boot::V1_1::MergeStatus;
69 using chromeos_update_engine::DeltaArchiveManifest;
70 using chromeos_update_engine::Extent;
71 using chromeos_update_engine::InstallOperation;
72 template <typename T>
73 using RepeatedPtrField = google::protobuf::RepeatedPtrField<T>;
74 using std::chrono::duration_cast;
75 using namespace std::chrono_literals;
76 using namespace std::string_literals;
77
78 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
79 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
80 static constexpr auto kUpdateStateCheckInterval = 2s;
81
82 // Note: IImageManager is an incomplete type in the header, so the default
83 // destructor doesn't work.
~SnapshotManager()84 SnapshotManager::~SnapshotManager() {}
85
New(IDeviceInfo * info)86 std::unique_ptr<SnapshotManager> SnapshotManager::New(IDeviceInfo* info) {
87 if (!info) {
88 info = new DeviceInfo();
89 }
90 return std::unique_ptr<SnapshotManager>(new SnapshotManager(info));
91 }
92
NewForFirstStageMount(IDeviceInfo * info)93 std::unique_ptr<SnapshotManager> SnapshotManager::NewForFirstStageMount(IDeviceInfo* info) {
94 auto sm = New(info);
95 if (!sm || !sm->ForceLocalImageManager()) {
96 return nullptr;
97 }
98 return sm;
99 }
100
SnapshotManager(IDeviceInfo * device)101 SnapshotManager::SnapshotManager(IDeviceInfo* device) : device_(device) {
102 gsid_dir_ = device_->GetGsidDir();
103 metadata_dir_ = device_->GetMetadataDir();
104 }
105
GetCowName(const std::string & snapshot_name)106 static std::string GetCowName(const std::string& snapshot_name) {
107 return snapshot_name + "-cow";
108 }
109
GetCowImageDeviceName(const std::string & snapshot_name)110 static std::string GetCowImageDeviceName(const std::string& snapshot_name) {
111 return snapshot_name + "-cow-img";
112 }
113
GetBaseDeviceName(const std::string & partition_name)114 static std::string GetBaseDeviceName(const std::string& partition_name) {
115 return partition_name + "-base";
116 }
117
GetSnapshotExtraDeviceName(const std::string & snapshot_name)118 static std::string GetSnapshotExtraDeviceName(const std::string& snapshot_name) {
119 return snapshot_name + "-inner";
120 }
121
BeginUpdate()122 bool SnapshotManager::BeginUpdate() {
123 bool needs_merge = false;
124 if (!TryCancelUpdate(&needs_merge)) {
125 return false;
126 }
127 if (needs_merge) {
128 LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
129 auto state = ProcessUpdateState();
130 LOG(INFO) << "Merged with state = " << state;
131 }
132
133 auto file = LockExclusive();
134 if (!file) return false;
135
136 // Purge the ImageManager just in case there is a corrupt lp_metadata file
137 // lying around. (NB: no need to return false on an error, we can let the
138 // update try to progress.)
139 if (EnsureImageManager()) {
140 images_->RemoveAllImages();
141 }
142
143 auto state = ReadUpdateState(file.get());
144 if (state != UpdateState::None) {
145 LOG(ERROR) << "An update is already in progress, cannot begin a new update";
146 return false;
147 }
148 return WriteUpdateState(file.get(), UpdateState::Initiated);
149 }
150
CancelUpdate()151 bool SnapshotManager::CancelUpdate() {
152 bool needs_merge = false;
153 if (!TryCancelUpdate(&needs_merge)) {
154 return false;
155 }
156 if (needs_merge) {
157 LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
158 }
159 return !needs_merge;
160 }
161
TryCancelUpdate(bool * needs_merge)162 bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
163 *needs_merge = false;
164
165 auto file = LockExclusive();
166 if (!file) return false;
167
168 UpdateState state = ReadUpdateState(file.get());
169 if (state == UpdateState::None) return true;
170
171 if (state == UpdateState::Initiated) {
172 LOG(INFO) << "Update has been initiated, now canceling";
173 return RemoveAllUpdateState(file.get());
174 }
175
176 if (state == UpdateState::Unverified) {
177 // We completed an update, but it can still be canceled if we haven't booted into it.
178 auto slot = GetCurrentSlot();
179 if (slot != Slot::Target) {
180 LOG(INFO) << "Canceling previously completed updates (if any)";
181 return RemoveAllUpdateState(file.get());
182 }
183 }
184 *needs_merge = true;
185 return true;
186 }
187
ReadUpdateSourceSlotSuffix()188 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
189 auto boot_file = GetSnapshotBootIndicatorPath();
190 std::string contents;
191 if (!android::base::ReadFileToString(boot_file, &contents)) {
192 PLOG(WARNING) << "Cannot read " << boot_file;
193 return {};
194 }
195 return contents;
196 }
197
GetCurrentSlot()198 SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
199 auto contents = ReadUpdateSourceSlotSuffix();
200 if (contents.empty()) {
201 return Slot::Unknown;
202 }
203 if (device_->GetSlotSuffix() == contents) {
204 return Slot::Source;
205 }
206 return Slot::Target;
207 }
208
RemoveFileIfExists(const std::string & path)209 static bool RemoveFileIfExists(const std::string& path) {
210 std::string message;
211 if (!android::base::RemoveFileIfExists(path, &message)) {
212 LOG(ERROR) << "Remove failed: " << path << ": " << message;
213 return false;
214 }
215 return true;
216 }
217
RemoveAllUpdateState(LockedFile * lock,const std::function<bool ()> & prolog)218 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function<bool()>& prolog) {
219 if (prolog && !prolog()) {
220 LOG(WARNING) << "Can't RemoveAllUpdateState: prolog failed.";
221 return false;
222 }
223
224 LOG(INFO) << "Removing all update state.";
225
226 if (!RemoveAllSnapshots(lock)) {
227 LOG(ERROR) << "Could not remove all snapshots";
228 return false;
229 }
230
231 // It's okay if these fail:
232 // - For SnapshotBoot and Rollback, first-stage init performs a deeper check after
233 // reading the indicator file, so it's not a problem if it still exists
234 // after the update completes.
235 // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
236 // matches the incoming update.
237 std::vector<std::string> files = {
238 GetSnapshotBootIndicatorPath(),
239 GetRollbackIndicatorPath(),
240 GetForwardMergeIndicatorPath(),
241 };
242 for (const auto& file : files) {
243 RemoveFileIfExists(file);
244 }
245
246 // If this fails, we'll keep trying to remove the update state (as the
247 // device reboots or starts a new update) until it finally succeeds.
248 return WriteUpdateState(lock, UpdateState::None);
249 }
250
FinishedSnapshotWrites(bool wipe)251 bool SnapshotManager::FinishedSnapshotWrites(bool wipe) {
252 auto lock = LockExclusive();
253 if (!lock) return false;
254
255 auto update_state = ReadUpdateState(lock.get());
256 if (update_state == UpdateState::Unverified) {
257 LOG(INFO) << "FinishedSnapshotWrites already called before. Ignored.";
258 return true;
259 }
260
261 if (update_state != UpdateState::Initiated) {
262 LOG(ERROR) << "Can only transition to the Unverified state from the Initiated state.";
263 return false;
264 }
265
266 if (!EnsureNoOverflowSnapshot(lock.get())) {
267 LOG(ERROR) << "Cannot ensure there are no overflow snapshots.";
268 return false;
269 }
270
271 if (!UpdateForwardMergeIndicator(wipe)) {
272 return false;
273 }
274
275 // This file is written on boot to detect whether a rollback occurred. It
276 // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
277 // snapshots too early.
278 if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
279 return false;
280 }
281
282 // This file acts as both a quick indicator for init (it can use access(2)
283 // to decide how to do first-stage mounts), and it stores the old slot, so
284 // we can tell whether or not we performed a rollback.
285 auto contents = device_->GetSlotSuffix();
286 auto boot_file = GetSnapshotBootIndicatorPath();
287 if (!WriteStringToFileAtomic(contents, boot_file)) {
288 PLOG(ERROR) << "write failed: " << boot_file;
289 return false;
290 }
291 return WriteUpdateState(lock.get(), UpdateState::Unverified);
292 }
293
CreateSnapshot(LockedFile * lock,SnapshotStatus * status)294 bool SnapshotManager::CreateSnapshot(LockedFile* lock, SnapshotStatus* status) {
295 CHECK(lock);
296 CHECK(lock->lock_mode() == LOCK_EX);
297 CHECK(status);
298
299 if (status->name().empty()) {
300 LOG(ERROR) << "SnapshotStatus has no name.";
301 return false;
302 }
303 // Sanity check these sizes. Like liblp, we guarantee the partition size
304 // is respected, which means it has to be sector-aligned. (This guarantee
305 // is useful for locating avb footers correctly). The COW file size, however,
306 // can be arbitrarily larger than specified, so we can safely round it up.
307 if (status->device_size() % kSectorSize != 0) {
308 LOG(ERROR) << "Snapshot " << status->name()
309 << " device size is not a multiple of the sector size: "
310 << status->device_size();
311 return false;
312 }
313 if (status->snapshot_size() % kSectorSize != 0) {
314 LOG(ERROR) << "Snapshot " << status->name()
315 << " snapshot size is not a multiple of the sector size: "
316 << status->snapshot_size();
317 return false;
318 }
319 if (status->cow_partition_size() % kSectorSize != 0) {
320 LOG(ERROR) << "Snapshot " << status->name()
321 << " cow partition size is not a multiple of the sector size: "
322 << status->cow_partition_size();
323 return false;
324 }
325 if (status->cow_file_size() % kSectorSize != 0) {
326 LOG(ERROR) << "Snapshot " << status->name()
327 << " cow file size is not a multiple of the sector size: "
328 << status->cow_file_size();
329 return false;
330 }
331
332 status->set_state(SnapshotState::CREATED);
333 status->set_sectors_allocated(0);
334 status->set_metadata_sectors(0);
335
336 if (!WriteSnapshotStatus(lock, *status)) {
337 PLOG(ERROR) << "Could not write snapshot status: " << status->name();
338 return false;
339 }
340 return true;
341 }
342
CreateCowImage(LockedFile * lock,const std::string & name)343 Return SnapshotManager::CreateCowImage(LockedFile* lock, const std::string& name) {
344 CHECK(lock);
345 CHECK(lock->lock_mode() == LOCK_EX);
346 if (!EnsureImageManager()) return Return::Error();
347
348 SnapshotStatus status;
349 if (!ReadSnapshotStatus(lock, name, &status)) {
350 return Return::Error();
351 }
352
353 // The COW file size should have been rounded up to the nearest sector in CreateSnapshot.
354 // Sanity check this.
355 if (status.cow_file_size() % kSectorSize != 0) {
356 LOG(ERROR) << "Snapshot " << name << " COW file size is not a multiple of the sector size: "
357 << status.cow_file_size();
358 return Return::Error();
359 }
360
361 std::string cow_image_name = GetCowImageDeviceName(name);
362 int cow_flags = IImageManager::CREATE_IMAGE_DEFAULT;
363 return Return(images_->CreateBackingImage(cow_image_name, status.cow_file_size(), cow_flags));
364 }
365
MapSnapshot(LockedFile * lock,const std::string & name,const std::string & base_device,const std::string & cow_device,const std::chrono::milliseconds & timeout_ms,std::string * dev_path)366 bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
367 const std::string& base_device, const std::string& cow_device,
368 const std::chrono::milliseconds& timeout_ms,
369 std::string* dev_path) {
370 CHECK(lock);
371
372 SnapshotStatus status;
373 if (!ReadSnapshotStatus(lock, name, &status)) {
374 return false;
375 }
376 if (status.state() == SnapshotState::NONE || status.state() == SnapshotState::MERGE_COMPLETED) {
377 LOG(ERROR) << "Should not create a snapshot device for " << name
378 << " after merging has completed.";
379 return false;
380 }
381
382 // Validate the block device size, as well as the requested snapshot size.
383 // Note that during first-stage init, we don't have the device paths.
384 if (android::base::StartsWith(base_device, "/")) {
385 unique_fd fd(open(base_device.c_str(), O_RDONLY | O_CLOEXEC));
386 if (fd < 0) {
387 PLOG(ERROR) << "open failed: " << base_device;
388 return false;
389 }
390 auto dev_size = get_block_device_size(fd);
391 if (!dev_size) {
392 PLOG(ERROR) << "Could not determine block device size: " << base_device;
393 return false;
394 }
395 if (status.device_size() != dev_size) {
396 LOG(ERROR) << "Block device size for " << base_device << " does not match"
397 << "(expected " << status.device_size() << ", got " << dev_size << ")";
398 return false;
399 }
400 }
401 if (status.device_size() % kSectorSize != 0) {
402 LOG(ERROR) << "invalid blockdev size for " << base_device << ": " << status.device_size();
403 return false;
404 }
405 if (status.snapshot_size() % kSectorSize != 0 ||
406 status.snapshot_size() > status.device_size()) {
407 LOG(ERROR) << "Invalid snapshot size for " << base_device << ": " << status.snapshot_size();
408 return false;
409 }
410 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
411 uint64_t linear_sectors = (status.device_size() - status.snapshot_size()) / kSectorSize;
412
413 auto& dm = DeviceMapper::Instance();
414
415 // Note that merging is a global state. We do track whether individual devices
416 // have completed merging, but the start of the merge process is considered
417 // atomic.
418 SnapshotStorageMode mode;
419 switch (ReadUpdateState(lock)) {
420 case UpdateState::MergeCompleted:
421 case UpdateState::MergeNeedsReboot:
422 LOG(ERROR) << "Should not create a snapshot device for " << name
423 << " after global merging has completed.";
424 return false;
425 case UpdateState::Merging:
426 case UpdateState::MergeFailed:
427 // Note: MergeFailed indicates that a merge is in progress, but
428 // is possibly stalled. We still have to honor the merge.
429 mode = SnapshotStorageMode::Merge;
430 break;
431 default:
432 mode = SnapshotStorageMode::Persistent;
433 break;
434 }
435
436 // The kernel (tested on 4.19) crashes horribly if a device has both a snapshot
437 // and a linear target in the same table. Instead, we stack them, and give the
438 // snapshot device a different name. It is not exposed to the caller in this
439 // case.
440 auto snap_name = (linear_sectors > 0) ? GetSnapshotExtraDeviceName(name) : name;
441
442 DmTable table;
443 table.Emplace<DmTargetSnapshot>(0, snapshot_sectors, base_device, cow_device, mode,
444 kSnapshotChunkSize);
445 if (!dm.CreateDevice(snap_name, table, dev_path, timeout_ms)) {
446 LOG(ERROR) << "Could not create snapshot device: " << snap_name;
447 return false;
448 }
449
450 if (linear_sectors) {
451 std::string snap_dev;
452 if (!dm.GetDeviceString(snap_name, &snap_dev)) {
453 LOG(ERROR) << "Cannot determine major/minor for: " << snap_name;
454 return false;
455 }
456
457 // Our stacking will looks like this:
458 // [linear, linear] ; to snapshot, and non-snapshot region of base device
459 // [snapshot-inner]
460 // [base device] [cow]
461 DmTable table;
462 table.Emplace<DmTargetLinear>(0, snapshot_sectors, snap_dev, 0);
463 table.Emplace<DmTargetLinear>(snapshot_sectors, linear_sectors, base_device,
464 snapshot_sectors);
465 if (!dm.CreateDevice(name, table, dev_path, timeout_ms)) {
466 LOG(ERROR) << "Could not create outer snapshot device: " << name;
467 dm.DeleteDevice(snap_name);
468 return false;
469 }
470 }
471
472 // :TODO: when merging is implemented, we need to add an argument to the
473 // status indicating how much progress is left to merge. (device-mapper
474 // does not retain the initial values, so we can't derive them.)
475 return true;
476 }
477
MapCowImage(const std::string & name,const std::chrono::milliseconds & timeout_ms)478 std::optional<std::string> SnapshotManager::MapCowImage(
479 const std::string& name, const std::chrono::milliseconds& timeout_ms) {
480 if (!EnsureImageManager()) return std::nullopt;
481 auto cow_image_name = GetCowImageDeviceName(name);
482
483 bool ok;
484 std::string cow_dev;
485 if (has_local_image_manager_) {
486 // If we forced a local image manager, it means we don't have binder,
487 // which means first-stage init. We must use device-mapper.
488 const auto& opener = device_->GetPartitionOpener();
489 ok = images_->MapImageWithDeviceMapper(opener, cow_image_name, &cow_dev);
490 } else {
491 ok = images_->MapImageDevice(cow_image_name, timeout_ms, &cow_dev);
492 }
493
494 if (ok) {
495 LOG(INFO) << "Mapped " << cow_image_name << " to " << cow_dev;
496 return cow_dev;
497 }
498 LOG(ERROR) << "Could not map image device: " << cow_image_name;
499 return std::nullopt;
500 }
501
UnmapSnapshot(LockedFile * lock,const std::string & name)502 bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
503 CHECK(lock);
504
505 auto& dm = DeviceMapper::Instance();
506 if (!dm.DeleteDeviceIfExists(name)) {
507 LOG(ERROR) << "Could not delete snapshot device: " << name;
508 return false;
509 }
510
511 auto snapshot_extra_device = GetSnapshotExtraDeviceName(name);
512 if (!dm.DeleteDeviceIfExists(snapshot_extra_device)) {
513 LOG(ERROR) << "Could not delete snapshot inner device: " << snapshot_extra_device;
514 return false;
515 }
516
517 return true;
518 }
519
UnmapCowImage(const std::string & name)520 bool SnapshotManager::UnmapCowImage(const std::string& name) {
521 if (!EnsureImageManager()) return false;
522 return images_->UnmapImageIfExists(GetCowImageDeviceName(name));
523 }
524
DeleteSnapshot(LockedFile * lock,const std::string & name)525 bool SnapshotManager::DeleteSnapshot(LockedFile* lock, const std::string& name) {
526 CHECK(lock);
527 CHECK(lock->lock_mode() == LOCK_EX);
528 if (!EnsureImageManager()) return false;
529
530 if (!UnmapCowDevices(lock, name)) {
531 return false;
532 }
533
534 // We can't delete snapshots in recovery. The only way we'd try is it we're
535 // completing or canceling a merge in preparation for a data wipe, in which
536 // case, we don't care if the file sticks around.
537 if (device_->IsRecovery()) {
538 LOG(INFO) << "Skipping delete of snapshot " << name << " in recovery.";
539 return true;
540 }
541
542 auto cow_image_name = GetCowImageDeviceName(name);
543 if (images_->BackingImageExists(cow_image_name)) {
544 if (!images_->DeleteBackingImage(cow_image_name)) {
545 return false;
546 }
547 }
548
549 std::string error;
550 auto file_path = GetSnapshotStatusFilePath(name);
551 if (!android::base::RemoveFileIfExists(file_path, &error)) {
552 LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
553 return false;
554 }
555 return true;
556 }
557
InitiateMerge(uint64_t * cow_file_size)558 bool SnapshotManager::InitiateMerge(uint64_t* cow_file_size) {
559 auto lock = LockExclusive();
560 if (!lock) return false;
561
562 UpdateState state = ReadUpdateState(lock.get());
563 if (state != UpdateState::Unverified) {
564 LOG(ERROR) << "Cannot begin a merge if an update has not been verified";
565 return false;
566 }
567
568 auto slot = GetCurrentSlot();
569 if (slot != Slot::Target) {
570 LOG(ERROR) << "Device cannot merge while not booting from new slot";
571 return false;
572 }
573
574 std::vector<std::string> snapshots;
575 if (!ListSnapshots(lock.get(), &snapshots)) {
576 LOG(ERROR) << "Could not list snapshots";
577 return false;
578 }
579
580 auto other_suffix = device_->GetOtherSlotSuffix();
581
582 auto& dm = DeviceMapper::Instance();
583 for (const auto& snapshot : snapshots) {
584 if (android::base::EndsWith(snapshot, other_suffix)) {
585 // Allow the merge to continue, but log this unexpected case.
586 LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
587 continue;
588 }
589
590 // The device has to be mapped, since everything should be merged at
591 // the same time. This is a fairly serious error. We could forcefully
592 // map everything here, but it should have been mapped during first-
593 // stage init.
594 if (dm.GetState(snapshot) == DmDeviceState::INVALID) {
595 LOG(ERROR) << "Cannot begin merge; device " << snapshot << " is not mapped.";
596 return false;
597 }
598 }
599
600 auto metadata = ReadCurrentMetadata();
601 for (auto it = snapshots.begin(); it != snapshots.end();) {
602 switch (GetMetadataPartitionState(*metadata, *it)) {
603 case MetadataPartitionState::Flashed:
604 LOG(WARNING) << "Detected re-flashing for partition " << *it
605 << ". Skip merging it.";
606 [[fallthrough]];
607 case MetadataPartitionState::None: {
608 LOG(WARNING) << "Deleting snapshot for partition " << *it;
609 if (!DeleteSnapshot(lock.get(), *it)) {
610 LOG(WARNING) << "Cannot delete snapshot for partition " << *it
611 << ". Skip merging it anyways.";
612 }
613 it = snapshots.erase(it);
614 } break;
615 case MetadataPartitionState::Updated: {
616 ++it;
617 } break;
618 }
619 }
620
621 uint64_t total_cow_file_size = 0;
622 DmTargetSnapshot::Status initial_target_values = {};
623 for (const auto& snapshot : snapshots) {
624 DmTargetSnapshot::Status current_status;
625 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) {
626 return false;
627 }
628 initial_target_values.sectors_allocated += current_status.sectors_allocated;
629 initial_target_values.total_sectors += current_status.total_sectors;
630 initial_target_values.metadata_sectors += current_status.metadata_sectors;
631
632 SnapshotStatus snapshot_status;
633 if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
634 return false;
635 }
636 total_cow_file_size += snapshot_status.cow_file_size();
637 }
638
639 if (cow_file_size) {
640 *cow_file_size = total_cow_file_size;
641 }
642
643 SnapshotUpdateStatus initial_status;
644 initial_status.set_state(UpdateState::Merging);
645 initial_status.set_sectors_allocated(initial_target_values.sectors_allocated);
646 initial_status.set_total_sectors(initial_target_values.total_sectors);
647 initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
648
649 // Point of no return - mark that we're starting a merge. From now on every
650 // snapshot must be a merge target.
651 if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
652 return false;
653 }
654
655 bool rewrote_all = true;
656 for (const auto& snapshot : snapshots) {
657 // If this fails, we have no choice but to continue. Everything must
658 // be merged. This is not an ideal state to be in, but it is safe,
659 // because we the next boot will try again.
660 if (!SwitchSnapshotToMerge(lock.get(), snapshot)) {
661 LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
662 rewrote_all = false;
663 }
664 }
665
666 // If we couldn't switch everything to a merge target, pre-emptively mark
667 // this merge as failed. It will get acknowledged when WaitForMerge() is
668 // called.
669 if (!rewrote_all) {
670 WriteUpdateState(lock.get(), UpdateState::MergeFailed);
671 }
672
673 // Return true no matter what, because a merge was initiated.
674 return true;
675 }
676
SwitchSnapshotToMerge(LockedFile * lock,const std::string & name)677 bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
678 SnapshotStatus status;
679 if (!ReadSnapshotStatus(lock, name, &status)) {
680 return false;
681 }
682 if (status.state() != SnapshotState::CREATED) {
683 LOG(WARNING) << "Snapshot " << name
684 << " has unexpected state: " << SnapshotState_Name(status.state());
685 }
686
687 // After this, we return true because we technically did switch to a merge
688 // target. Everything else we do here is just informational.
689 auto dm_name = GetSnapshotDeviceName(name, status);
690 if (!RewriteSnapshotDeviceTable(dm_name)) {
691 return false;
692 }
693
694 status.set_state(SnapshotState::MERGING);
695
696 DmTargetSnapshot::Status dm_status;
697 if (!QuerySnapshotStatus(dm_name, nullptr, &dm_status)) {
698 LOG(ERROR) << "Could not query merge status for snapshot: " << dm_name;
699 }
700 status.set_sectors_allocated(dm_status.sectors_allocated);
701 status.set_metadata_sectors(dm_status.metadata_sectors);
702 if (!WriteSnapshotStatus(lock, status)) {
703 LOG(ERROR) << "Could not update status file for snapshot: " << name;
704 }
705 return true;
706 }
707
RewriteSnapshotDeviceTable(const std::string & dm_name)708 bool SnapshotManager::RewriteSnapshotDeviceTable(const std::string& dm_name) {
709 auto& dm = DeviceMapper::Instance();
710
711 std::vector<DeviceMapper::TargetInfo> old_targets;
712 if (!dm.GetTableInfo(dm_name, &old_targets)) {
713 LOG(ERROR) << "Could not read snapshot device table: " << dm_name;
714 return false;
715 }
716 if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
717 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << dm_name;
718 return false;
719 }
720
721 std::string base_device, cow_device;
722 if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
723 LOG(ERROR) << "Could not derive underlying devices for snapshot: " << dm_name;
724 return false;
725 }
726
727 DmTable table;
728 table.Emplace<DmTargetSnapshot>(0, old_targets[0].spec.length, base_device, cow_device,
729 SnapshotStorageMode::Merge, kSnapshotChunkSize);
730 if (!dm.LoadTableAndActivate(dm_name, table)) {
731 LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << dm_name;
732 return false;
733 }
734 LOG(INFO) << "Successfully switched snapshot device to a merge target: " << dm_name;
735 return true;
736 }
737
738 enum class TableQuery {
739 Table,
740 Status,
741 };
742
GetSingleTarget(const std::string & dm_name,TableQuery query,DeviceMapper::TargetInfo * target)743 static bool GetSingleTarget(const std::string& dm_name, TableQuery query,
744 DeviceMapper::TargetInfo* target) {
745 auto& dm = DeviceMapper::Instance();
746 if (dm.GetState(dm_name) == DmDeviceState::INVALID) {
747 return false;
748 }
749
750 std::vector<DeviceMapper::TargetInfo> targets;
751 bool result;
752 if (query == TableQuery::Status) {
753 result = dm.GetTableStatus(dm_name, &targets);
754 } else {
755 result = dm.GetTableInfo(dm_name, &targets);
756 }
757 if (!result) {
758 LOG(ERROR) << "Could not query device: " << dm_name;
759 return false;
760 }
761 if (targets.size() != 1) {
762 return false;
763 }
764
765 *target = std::move(targets[0]);
766 return true;
767 }
768
IsSnapshotDevice(const std::string & dm_name,TargetInfo * target)769 bool SnapshotManager::IsSnapshotDevice(const std::string& dm_name, TargetInfo* target) {
770 DeviceMapper::TargetInfo snap_target;
771 if (!GetSingleTarget(dm_name, TableQuery::Status, &snap_target)) {
772 return false;
773 }
774 auto type = DeviceMapper::GetTargetType(snap_target.spec);
775 if (type != "snapshot" && type != "snapshot-merge") {
776 return false;
777 }
778 if (target) {
779 *target = std::move(snap_target);
780 }
781 return true;
782 }
783
QuerySnapshotStatus(const std::string & dm_name,std::string * target_type,DmTargetSnapshot::Status * status)784 bool SnapshotManager::QuerySnapshotStatus(const std::string& dm_name, std::string* target_type,
785 DmTargetSnapshot::Status* status) {
786 DeviceMapper::TargetInfo target;
787 if (!IsSnapshotDevice(dm_name, &target)) {
788 LOG(ERROR) << "Device " << dm_name << " is not a snapshot or snapshot-merge device";
789 return false;
790 }
791 if (!DmTargetSnapshot::ParseStatusText(target.data, status)) {
792 LOG(ERROR) << "Could not parse snapshot status text: " << dm_name;
793 return false;
794 }
795 if (target_type) {
796 *target_type = DeviceMapper::GetTargetType(target.spec);
797 }
798 return true;
799 }
800
801 // Note that when a merge fails, we will *always* try again to complete the
802 // merge each time the device boots. There is no harm in doing so, and if
803 // the problem was transient, we might manage to get a new outcome.
ProcessUpdateState(const std::function<bool ()> & callback,const std::function<bool ()> & before_cancel)804 UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
805 const std::function<bool()>& before_cancel) {
806 while (true) {
807 UpdateState state = CheckMergeState(before_cancel);
808 if (state == UpdateState::MergeFailed) {
809 AcknowledgeMergeFailure();
810 }
811 if (state != UpdateState::Merging) {
812 // Either there is no merge, or the merge was finished, so no need
813 // to keep waiting.
814 return state;
815 }
816
817 if (callback && !callback()) {
818 return state;
819 }
820
821 // This wait is not super time sensitive, so we have a relatively
822 // low polling frequency.
823 std::this_thread::sleep_for(kUpdateStateCheckInterval);
824 }
825 }
826
CheckMergeState(const std::function<bool ()> & before_cancel)827 UpdateState SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) {
828 auto lock = LockExclusive();
829 if (!lock) {
830 return UpdateState::MergeFailed;
831 }
832
833 UpdateState state = CheckMergeState(lock.get(), before_cancel);
834 if (state == UpdateState::MergeCompleted) {
835 // Do this inside the same lock. Failures get acknowledged without the
836 // lock, because flock() might have failed.
837 AcknowledgeMergeSuccess(lock.get());
838 } else if (state == UpdateState::Cancelled) {
839 if (!RemoveAllUpdateState(lock.get(), before_cancel)) {
840 return ReadSnapshotUpdateStatus(lock.get()).state();
841 }
842 }
843 return state;
844 }
845
CheckMergeState(LockedFile * lock,const std::function<bool ()> & before_cancel)846 UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
847 const std::function<bool()>& before_cancel) {
848 UpdateState state = ReadUpdateState(lock);
849 switch (state) {
850 case UpdateState::None:
851 case UpdateState::MergeCompleted:
852 // Harmless races are allowed between two callers of WaitForMerge,
853 // so in both of these cases we just propagate the state.
854 return state;
855
856 case UpdateState::Merging:
857 case UpdateState::MergeNeedsReboot:
858 case UpdateState::MergeFailed:
859 // We'll poll each snapshot below. Note that for the NeedsReboot
860 // case, we always poll once to give cleanup another opportunity to
861 // run.
862 break;
863
864 case UpdateState::Unverified:
865 // This is an edge case. Normally cancelled updates are detected
866 // via the merge poll below, but if we never started a merge, we
867 // need to also check here.
868 if (HandleCancelledUpdate(lock, before_cancel)) {
869 return UpdateState::Cancelled;
870 }
871 return state;
872
873 default:
874 return state;
875 }
876
877 std::vector<std::string> snapshots;
878 if (!ListSnapshots(lock, &snapshots)) {
879 return UpdateState::MergeFailed;
880 }
881
882 bool cancelled = false;
883 bool failed = false;
884 bool merging = false;
885 bool needs_reboot = false;
886 for (const auto& snapshot : snapshots) {
887 UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot);
888 switch (snapshot_state) {
889 case UpdateState::MergeFailed:
890 failed = true;
891 break;
892 case UpdateState::Merging:
893 merging = true;
894 break;
895 case UpdateState::MergeNeedsReboot:
896 needs_reboot = true;
897 break;
898 case UpdateState::MergeCompleted:
899 break;
900 case UpdateState::Cancelled:
901 cancelled = true;
902 break;
903 default:
904 LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
905 << "\"" << snapshot_state << "\"";
906 failed = true;
907 break;
908 }
909 }
910
911 if (merging) {
912 // Note that we handle "Merging" before we handle anything else. We
913 // want to poll until *nothing* is merging if we can, so everything has
914 // a chance to get marked as completed or failed.
915 return UpdateState::Merging;
916 }
917 if (failed) {
918 // Note: since there are many drop-out cases for failure, we acknowledge
919 // it in WaitForMerge rather than here and elsewhere.
920 return UpdateState::MergeFailed;
921 }
922 if (needs_reboot) {
923 WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
924 return UpdateState::MergeNeedsReboot;
925 }
926 if (cancelled) {
927 // This is an edge case, that we handle as correctly as we sensibly can.
928 // The underlying partition has changed behind update_engine, and we've
929 // removed the snapshot as a result. The exact state of the update is
930 // undefined now, but this can only happen on an unlocked device where
931 // partitions can be flashed without wiping userdata.
932 return UpdateState::Cancelled;
933 }
934 return UpdateState::MergeCompleted;
935 }
936
CheckTargetMergeState(LockedFile * lock,const std::string & name)937 UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name) {
938 SnapshotStatus snapshot_status;
939 if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
940 return UpdateState::MergeFailed;
941 }
942
943 std::string dm_name = GetSnapshotDeviceName(name, snapshot_status);
944
945 std::unique_ptr<LpMetadata> current_metadata;
946
947 if (!IsSnapshotDevice(dm_name)) {
948 if (!current_metadata) {
949 current_metadata = ReadCurrentMetadata();
950 }
951
952 if (!current_metadata ||
953 GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
954 DeleteSnapshot(lock, name);
955 return UpdateState::Cancelled;
956 }
957
958 // During a check, we decided the merge was complete, but we were unable to
959 // collapse the device-mapper stack and perform COW cleanup. If we haven't
960 // rebooted after this check, the device will still be a snapshot-merge
961 // target. If the have rebooted, the device will now be a linear target,
962 // and we can try cleanup again.
963 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
964 // NB: It's okay if this fails now, we gave cleanup our best effort.
965 OnSnapshotMergeComplete(lock, name, snapshot_status);
966 return UpdateState::MergeCompleted;
967 }
968
969 LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << dm_name;
970 return UpdateState::MergeFailed;
971 }
972
973 // This check is expensive so it is only enabled for debugging.
974 DCHECK((current_metadata = ReadCurrentMetadata()) &&
975 GetMetadataPartitionState(*current_metadata, name) == MetadataPartitionState::Updated);
976
977 std::string target_type;
978 DmTargetSnapshot::Status status;
979 if (!QuerySnapshotStatus(dm_name, &target_type, &status)) {
980 return UpdateState::MergeFailed;
981 }
982 if (target_type != "snapshot-merge") {
983 // We can get here if we failed to rewrite the target type in
984 // InitiateMerge(). If we failed to create the target in first-stage
985 // init, boot would not succeed.
986 LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
987 return UpdateState::MergeFailed;
988 }
989
990 // These two values are equal when merging is complete.
991 if (status.sectors_allocated != status.metadata_sectors) {
992 if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
993 LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete.";
994 return UpdateState::MergeFailed;
995 }
996 return UpdateState::Merging;
997 }
998
999 // Merging is done. First, update the status file to indicate the merge
1000 // is complete. We do this before calling OnSnapshotMergeComplete, even
1001 // though this means the write is potentially wasted work (since in the
1002 // ideal case we'll immediately delete the file).
1003 //
1004 // This makes it simpler to reason about the next reboot: no matter what
1005 // part of cleanup failed, first-stage init won't try to create another
1006 // snapshot device for this partition.
1007 snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
1008 if (!WriteSnapshotStatus(lock, snapshot_status)) {
1009 return UpdateState::MergeFailed;
1010 }
1011 if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
1012 return UpdateState::MergeNeedsReboot;
1013 }
1014 return UpdateState::MergeCompleted;
1015 }
1016
GetSnapshotBootIndicatorPath()1017 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
1018 return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
1019 }
1020
GetRollbackIndicatorPath()1021 std::string SnapshotManager::GetRollbackIndicatorPath() {
1022 return metadata_dir_ + "/" + android::base::Basename(kRollbackIndicatorPath);
1023 }
1024
GetForwardMergeIndicatorPath()1025 std::string SnapshotManager::GetForwardMergeIndicatorPath() {
1026 return metadata_dir_ + "/allow-forward-merge";
1027 }
1028
AcknowledgeMergeSuccess(LockedFile * lock)1029 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
1030 // It's not possible to remove update state in recovery, so write an
1031 // indicator that cleanup is needed on reboot. If a factory data reset
1032 // was requested, it doesn't matter, everything will get wiped anyway.
1033 // To make testing easier we consider a /data wipe as cleaned up.
1034 if (device_->IsRecovery() && !in_factory_data_reset_) {
1035 WriteUpdateState(lock, UpdateState::MergeCompleted);
1036 return;
1037 }
1038
1039 RemoveAllUpdateState(lock);
1040 }
1041
AcknowledgeMergeFailure()1042 void SnapshotManager::AcknowledgeMergeFailure() {
1043 // Log first, so worst case, we always have a record of why the calls below
1044 // were being made.
1045 LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
1046
1047 auto lock = LockExclusive();
1048 if (!lock) return;
1049
1050 // Since we released the lock in between WaitForMerge and here, it's
1051 // possible (1) the merge successfully completed or (2) was already
1052 // marked as a failure. So make sure to check the state again, and
1053 // only mark as a failure if appropriate.
1054 UpdateState state = ReadUpdateState(lock.get());
1055 if (state != UpdateState::Merging && state != UpdateState::MergeNeedsReboot) {
1056 return;
1057 }
1058
1059 WriteUpdateState(lock.get(), UpdateState::MergeFailed);
1060 }
1061
OnSnapshotMergeComplete(LockedFile * lock,const std::string & name,const SnapshotStatus & status)1062 bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
1063 const SnapshotStatus& status) {
1064 auto dm_name = GetSnapshotDeviceName(name, status);
1065 if (IsSnapshotDevice(dm_name)) {
1066 // We are extra-cautious here, to avoid deleting the wrong table.
1067 std::string target_type;
1068 DmTargetSnapshot::Status dm_status;
1069 if (!QuerySnapshotStatus(dm_name, &target_type, &dm_status)) {
1070 return false;
1071 }
1072 if (target_type != "snapshot-merge") {
1073 LOG(ERROR) << "Unexpected target type " << target_type
1074 << " for snapshot device: " << dm_name;
1075 return false;
1076 }
1077 if (dm_status.sectors_allocated != dm_status.metadata_sectors) {
1078 LOG(ERROR) << "Merge is unexpectedly incomplete for device " << dm_name;
1079 return false;
1080 }
1081 if (!CollapseSnapshotDevice(name, status)) {
1082 LOG(ERROR) << "Unable to collapse snapshot: " << name;
1083 return false;
1084 }
1085 // Note that collapsing is implicitly an Unmap, so we don't need to
1086 // unmap the snapshot.
1087 }
1088
1089 if (!DeleteSnapshot(lock, name)) {
1090 LOG(ERROR) << "Could not delete snapshot: " << name;
1091 return false;
1092 }
1093 return true;
1094 }
1095
CollapseSnapshotDevice(const std::string & name,const SnapshotStatus & status)1096 bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
1097 const SnapshotStatus& status) {
1098 auto& dm = DeviceMapper::Instance();
1099 auto dm_name = GetSnapshotDeviceName(name, status);
1100
1101 // Verify we have a snapshot-merge device.
1102 DeviceMapper::TargetInfo target;
1103 if (!GetSingleTarget(dm_name, TableQuery::Table, &target)) {
1104 return false;
1105 }
1106 if (DeviceMapper::GetTargetType(target.spec) != "snapshot-merge") {
1107 // This should be impossible, it was checked earlier.
1108 LOG(ERROR) << "Snapshot device has invalid target type: " << dm_name;
1109 return false;
1110 }
1111
1112 std::string base_device, cow_device;
1113 if (!DmTargetSnapshot::GetDevicesFromParams(target.data, &base_device, &cow_device)) {
1114 LOG(ERROR) << "Could not parse snapshot device " << dm_name
1115 << " parameters: " << target.data;
1116 return false;
1117 }
1118
1119 uint64_t snapshot_sectors = status.snapshot_size() / kSectorSize;
1120 if (snapshot_sectors * kSectorSize != status.snapshot_size()) {
1121 LOG(ERROR) << "Snapshot " << name
1122 << " size is not sector aligned: " << status.snapshot_size();
1123 return false;
1124 }
1125
1126 if (dm_name != name) {
1127 // We've derived the base device, but we actually need to replace the
1128 // table of the outermost device. Do a quick verification that this
1129 // device looks like we expect it to.
1130 std::vector<DeviceMapper::TargetInfo> outer_table;
1131 if (!dm.GetTableInfo(name, &outer_table)) {
1132 LOG(ERROR) << "Could not validate outer snapshot table: " << name;
1133 return false;
1134 }
1135 if (outer_table.size() != 2) {
1136 LOG(ERROR) << "Expected 2 dm-linear targets for table " << name
1137 << ", got: " << outer_table.size();
1138 return false;
1139 }
1140 for (const auto& target : outer_table) {
1141 auto target_type = DeviceMapper::GetTargetType(target.spec);
1142 if (target_type != "linear") {
1143 LOG(ERROR) << "Outer snapshot table may only contain linear targets, but " << name
1144 << " has target: " << target_type;
1145 return false;
1146 }
1147 }
1148 if (outer_table[0].spec.length != snapshot_sectors) {
1149 LOG(ERROR) << "dm-snapshot " << name << " should have " << snapshot_sectors
1150 << " sectors, got: " << outer_table[0].spec.length;
1151 return false;
1152 }
1153 uint64_t expected_device_sectors = status.device_size() / kSectorSize;
1154 uint64_t actual_device_sectors = outer_table[0].spec.length + outer_table[1].spec.length;
1155 if (expected_device_sectors != actual_device_sectors) {
1156 LOG(ERROR) << "Outer device " << name << " should have " << expected_device_sectors
1157 << " sectors, got: " << actual_device_sectors;
1158 return false;
1159 }
1160 }
1161
1162 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1163 // Create a DmTable that is identical to the base device.
1164 CreateLogicalPartitionParams base_device_params{
1165 .block_device = device_->GetSuperDevice(slot),
1166 .metadata_slot = slot,
1167 .partition_name = name,
1168 .partition_opener = &device_->GetPartitionOpener(),
1169 };
1170 DmTable table;
1171 if (!CreateDmTable(base_device_params, &table)) {
1172 LOG(ERROR) << "Could not create a DmTable for partition: " << name;
1173 return false;
1174 }
1175
1176 // Note: we are replacing the *outer* table here, so we do not use dm_name.
1177 if (!dm.LoadTableAndActivate(name, table)) {
1178 return false;
1179 }
1180
1181 // Attempt to delete the snapshot device if one still exists. Nothing
1182 // should be depending on the device, and device-mapper should have
1183 // flushed remaining I/O. We could in theory replace with dm-zero (or
1184 // re-use the table above), but for now it's better to know why this
1185 // would fail.
1186 if (dm_name != name && !dm.DeleteDeviceIfExists(dm_name)) {
1187 LOG(ERROR) << "Unable to delete snapshot device " << dm_name << ", COW cannot be "
1188 << "reclaimed until after reboot.";
1189 return false;
1190 }
1191
1192 // Cleanup the base device as well, since it is no longer used. This does
1193 // not block cleanup.
1194 auto base_name = GetBaseDeviceName(name);
1195 if (!dm.DeleteDeviceIfExists(base_name)) {
1196 LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
1197 }
1198 return true;
1199 }
1200
HandleCancelledUpdate(LockedFile * lock,const std::function<bool ()> & before_cancel)1201 bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock,
1202 const std::function<bool()>& before_cancel) {
1203 auto slot = GetCurrentSlot();
1204 if (slot == Slot::Unknown) {
1205 return false;
1206 }
1207
1208 // If all snapshots were reflashed, then cancel the entire update.
1209 if (AreAllSnapshotsCancelled(lock)) {
1210 LOG(WARNING) << "Detected re-flashing, cancelling unverified update.";
1211 return RemoveAllUpdateState(lock, before_cancel);
1212 }
1213
1214 // If update has been rolled back, then cancel the entire update.
1215 // Client (update_engine) is responsible for doing additional cleanup work on its own states
1216 // when ProcessUpdateState() returns UpdateState::Cancelled.
1217 auto current_slot = GetCurrentSlot();
1218 if (current_slot != Slot::Source) {
1219 LOG(INFO) << "Update state is being processed while booting at " << current_slot
1220 << " slot, taking no action.";
1221 return false;
1222 }
1223
1224 // current_slot == Source. Attempt to detect rollbacks.
1225 if (access(GetRollbackIndicatorPath().c_str(), F_OK) != 0) {
1226 // This unverified update is not attempted. Take no action.
1227 PLOG(INFO) << "Rollback indicator not detected. "
1228 << "Update state is being processed before reboot, taking no action.";
1229 return false;
1230 }
1231
1232 LOG(WARNING) << "Detected rollback, cancelling unverified update.";
1233 return RemoveAllUpdateState(lock, before_cancel);
1234 }
1235
ReadCurrentMetadata()1236 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
1237 const auto& opener = device_->GetPartitionOpener();
1238 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1239 auto super_device = device_->GetSuperDevice(slot);
1240 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1241 if (!metadata) {
1242 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1243 return nullptr;
1244 }
1245 return metadata;
1246 }
1247
GetMetadataPartitionState(const LpMetadata & metadata,const std::string & name)1248 SnapshotManager::MetadataPartitionState SnapshotManager::GetMetadataPartitionState(
1249 const LpMetadata& metadata, const std::string& name) {
1250 auto partition = android::fs_mgr::FindPartition(metadata, name);
1251 if (!partition) return MetadataPartitionState::None;
1252 if (partition->attributes & LP_PARTITION_ATTR_UPDATED) {
1253 return MetadataPartitionState::Updated;
1254 }
1255 return MetadataPartitionState::Flashed;
1256 }
1257
AreAllSnapshotsCancelled(LockedFile * lock)1258 bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
1259 std::vector<std::string> snapshots;
1260 if (!ListSnapshots(lock, &snapshots)) {
1261 LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
1262 << "after applying an update. Assuming no snapshots.";
1263 // Let HandleCancelledUpdate resets UpdateState.
1264 return true;
1265 }
1266
1267 std::map<std::string, bool> flashing_status;
1268
1269 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1270 LOG(WARNING) << "Failed to determine whether partitions have been flashed. Not"
1271 << "removing update states.";
1272 return false;
1273 }
1274
1275 bool all_snapshots_cancelled = std::all_of(flashing_status.begin(), flashing_status.end(),
1276 [](const auto& pair) { return pair.second; });
1277
1278 if (all_snapshots_cancelled) {
1279 LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
1280 }
1281 return all_snapshots_cancelled;
1282 }
1283
GetSnapshotFlashingStatus(LockedFile * lock,const std::vector<std::string> & snapshots,std::map<std::string,bool> * out)1284 bool SnapshotManager::GetSnapshotFlashingStatus(LockedFile* lock,
1285 const std::vector<std::string>& snapshots,
1286 std::map<std::string, bool>* out) {
1287 CHECK(lock);
1288
1289 auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
1290 if (source_slot_suffix.empty()) {
1291 return false;
1292 }
1293 uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
1294 uint32_t target_slot = (source_slot == 0) ? 1 : 0;
1295
1296 // Attempt to detect re-flashing on each partition.
1297 // - If all partitions are re-flashed, we can proceed to cancel the whole update.
1298 // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
1299 // deleted. Caller is responsible for merging the rest of the snapshots.
1300 // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
1301 //
1302 // Note that we use target slot metadata, since if an OTA has been applied
1303 // to the target slot, we can detect the UPDATED flag. Any kind of flash
1304 // operation against dynamic partitions ensures that all copies of the
1305 // metadata are in sync, so flashing all partitions on the source slot will
1306 // remove the UPDATED flag on the target slot as well.
1307 const auto& opener = device_->GetPartitionOpener();
1308 auto super_device = device_->GetSuperDevice(target_slot);
1309 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
1310 if (!metadata) {
1311 return false;
1312 }
1313
1314 for (const auto& snapshot_name : snapshots) {
1315 if (GetMetadataPartitionState(*metadata, snapshot_name) ==
1316 MetadataPartitionState::Updated) {
1317 out->emplace(snapshot_name, false);
1318 } else {
1319 // Delete snapshots for partitions that are re-flashed after the update.
1320 LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
1321 out->emplace(snapshot_name, true);
1322 }
1323 }
1324 return true;
1325 }
1326
RemoveAllSnapshots(LockedFile * lock)1327 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
1328 std::vector<std::string> snapshots;
1329 if (!ListSnapshots(lock, &snapshots)) {
1330 LOG(ERROR) << "Could not list snapshots";
1331 return false;
1332 }
1333
1334 std::map<std::string, bool> flashing_status;
1335 if (!GetSnapshotFlashingStatus(lock, snapshots, &flashing_status)) {
1336 LOG(WARNING) << "Failed to get flashing status";
1337 }
1338
1339 auto current_slot = GetCurrentSlot();
1340 bool ok = true;
1341 bool has_mapped_cow_images = false;
1342 for (const auto& name : snapshots) {
1343 // If booting off source slot, it is okay to unmap and delete all the snapshots.
1344 // If boot indicator is missing, update state is None or Initiated, so
1345 // it is also okay to unmap and delete all the snapshots.
1346 // If booting off target slot,
1347 // - should not unmap because:
1348 // - In Android mode, snapshots are not mapped, but
1349 // filesystems are mounting off dm-linear targets directly.
1350 // - In recovery mode, assume nothing is mapped, so it is optional to unmap.
1351 // - If partition is flashed or unknown, it is okay to delete snapshots.
1352 // Otherwise (UPDATED flag), only delete snapshots if they are not mapped
1353 // as dm-snapshot (for example, after merge completes).
1354 bool should_unmap = current_slot != Slot::Target;
1355 bool should_delete = ShouldDeleteSnapshot(lock, flashing_status, current_slot, name);
1356
1357 bool partition_ok = true;
1358 if (should_unmap && !UnmapPartitionWithSnapshot(lock, name)) {
1359 partition_ok = false;
1360 }
1361 if (partition_ok && should_delete && !DeleteSnapshot(lock, name)) {
1362 partition_ok = false;
1363 }
1364
1365 if (!partition_ok) {
1366 // Remember whether or not we were able to unmap the cow image.
1367 auto cow_image_device = GetCowImageDeviceName(name);
1368 has_mapped_cow_images |=
1369 (EnsureImageManager() && images_->IsImageMapped(cow_image_device));
1370
1371 ok = false;
1372 }
1373 }
1374
1375 if (ok || !has_mapped_cow_images) {
1376 // Delete any image artifacts as a precaution, in case an update is
1377 // being cancelled due to some corrupted state in an lp_metadata file.
1378 // Note that we do not do this if some cow images are still mapped,
1379 // since we must not remove backing storage if it's in use.
1380 if (!EnsureImageManager() || !images_->RemoveAllImages()) {
1381 LOG(ERROR) << "Could not remove all snapshot artifacts";
1382 return false;
1383 }
1384 }
1385 return ok;
1386 }
1387
1388 // See comments in RemoveAllSnapshots().
ShouldDeleteSnapshot(LockedFile * lock,const std::map<std::string,bool> & flashing_status,Slot current_slot,const std::string & name)1389 bool SnapshotManager::ShouldDeleteSnapshot(LockedFile* lock,
1390 const std::map<std::string, bool>& flashing_status,
1391 Slot current_slot, const std::string& name) {
1392 if (current_slot != Slot::Target) {
1393 return true;
1394 }
1395 auto it = flashing_status.find(name);
1396 if (it == flashing_status.end()) {
1397 LOG(WARNING) << "Can't determine flashing status for " << name;
1398 return true;
1399 }
1400 if (it->second) {
1401 // partition flashed, okay to delete obsolete snapshots
1402 return true;
1403 }
1404 // partition updated, only delete if not dm-snapshot
1405 SnapshotStatus status;
1406 if (!ReadSnapshotStatus(lock, name, &status)) {
1407 LOG(WARNING) << "Unable to read snapshot status for " << name
1408 << ", guessing snapshot device name";
1409 auto extra_name = GetSnapshotExtraDeviceName(name);
1410 return !IsSnapshotDevice(name) && !IsSnapshotDevice(extra_name);
1411 }
1412 auto dm_name = GetSnapshotDeviceName(name, status);
1413 return !IsSnapshotDevice(dm_name);
1414 }
1415
GetUpdateState(double * progress)1416 UpdateState SnapshotManager::GetUpdateState(double* progress) {
1417 // If we've never started an update, the state file won't exist.
1418 auto state_file = GetStateFilePath();
1419 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
1420 return UpdateState::None;
1421 }
1422
1423 auto lock = LockShared();
1424 if (!lock) {
1425 return UpdateState::None;
1426 }
1427
1428 SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
1429 auto state = update_status.state();
1430 if (progress == nullptr) {
1431 return state;
1432 }
1433
1434 if (state == UpdateState::MergeCompleted) {
1435 *progress = 100.0;
1436 return state;
1437 }
1438
1439 *progress = 0.0;
1440 if (state != UpdateState::Merging) {
1441 return state;
1442 }
1443
1444 // Sum all the snapshot states as if the system consists of a single huge
1445 // snapshots device, then compute the merge completion percentage of that
1446 // device.
1447 std::vector<std::string> snapshots;
1448 if (!ListSnapshots(lock.get(), &snapshots)) {
1449 LOG(ERROR) << "Could not list snapshots";
1450 return state;
1451 }
1452
1453 DmTargetSnapshot::Status fake_snapshots_status = {};
1454 for (const auto& snapshot : snapshots) {
1455 DmTargetSnapshot::Status current_status;
1456
1457 if (!QuerySnapshotStatus(snapshot, nullptr, ¤t_status)) continue;
1458
1459 fake_snapshots_status.sectors_allocated += current_status.sectors_allocated;
1460 fake_snapshots_status.total_sectors += current_status.total_sectors;
1461 fake_snapshots_status.metadata_sectors += current_status.metadata_sectors;
1462 }
1463
1464 *progress = DmTargetSnapshot::MergePercent(fake_snapshots_status,
1465 update_status.sectors_allocated());
1466
1467 return state;
1468 }
1469
ListSnapshots(LockedFile * lock,std::vector<std::string> * snapshots)1470 bool SnapshotManager::ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots) {
1471 CHECK(lock);
1472
1473 auto dir_path = metadata_dir_ + "/snapshots"s;
1474 std::unique_ptr<DIR, decltype(&closedir)> dir(opendir(dir_path.c_str()), closedir);
1475 if (!dir) {
1476 PLOG(ERROR) << "opendir failed: " << dir_path;
1477 return false;
1478 }
1479
1480 struct dirent* dp;
1481 while ((dp = readdir(dir.get())) != nullptr) {
1482 if (dp->d_type != DT_REG) continue;
1483 snapshots->emplace_back(dp->d_name);
1484 }
1485 return true;
1486 }
1487
IsSnapshotManagerNeeded()1488 bool SnapshotManager::IsSnapshotManagerNeeded() {
1489 return access(kBootIndicatorPath, F_OK) == 0;
1490 }
1491
GetGlobalRollbackIndicatorPath()1492 std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
1493 return kRollbackIndicatorPath;
1494 }
1495
NeedSnapshotsInFirstStageMount()1496 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
1497 // If we fail to read, we'll wind up using CreateLogicalPartitions, which
1498 // will create devices that look like the old slot, except with extra
1499 // content at the end of each device. This will confuse dm-verity, and
1500 // ultimately we'll fail to boot. Why not make it a fatal error and have
1501 // the reason be clearer? Because the indicator file still exists, and
1502 // if this was FATAL, reverting to the old slot would be broken.
1503 auto slot = GetCurrentSlot();
1504
1505 if (slot != Slot::Target) {
1506 if (slot == Slot::Source) {
1507 // Device is rebooting into the original slot, so mark this as a
1508 // rollback.
1509 auto path = GetRollbackIndicatorPath();
1510 if (!android::base::WriteStringToFile("1", path)) {
1511 PLOG(ERROR) << "Unable to write rollback indicator: " << path;
1512 } else {
1513 LOG(INFO) << "Rollback detected, writing rollback indicator to " << path;
1514 }
1515 }
1516 LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
1517 return false;
1518 }
1519
1520 // If we can't read the update state, it's unlikely anything else will
1521 // succeed, so this is a fatal error. We'll eventually exhaust boot
1522 // attempts and revert to the old slot.
1523 auto lock = LockShared();
1524 if (!lock) {
1525 LOG(FATAL) << "Could not read update state to determine snapshot status";
1526 return false;
1527 }
1528 switch (ReadUpdateState(lock.get())) {
1529 case UpdateState::Unverified:
1530 case UpdateState::Merging:
1531 case UpdateState::MergeFailed:
1532 return true;
1533 default:
1534 return false;
1535 }
1536 }
1537
CreateLogicalAndSnapshotPartitions(const std::string & super_device,const std::chrono::milliseconds & timeout_ms)1538 bool SnapshotManager::CreateLogicalAndSnapshotPartitions(
1539 const std::string& super_device, const std::chrono::milliseconds& timeout_ms) {
1540 LOG(INFO) << "Creating logical partitions with snapshots as needed";
1541
1542 auto lock = LockExclusive();
1543 if (!lock) return false;
1544
1545 const auto& opener = device_->GetPartitionOpener();
1546 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
1547 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
1548 if (!metadata) {
1549 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
1550 return false;
1551 }
1552
1553 for (const auto& partition : metadata->partitions) {
1554 if (GetPartitionGroupName(metadata->groups[partition.group_index]) == kCowGroupName) {
1555 LOG(INFO) << "Skip mapping partition " << GetPartitionName(partition) << " in group "
1556 << kCowGroupName;
1557 continue;
1558 }
1559
1560 CreateLogicalPartitionParams params = {
1561 .block_device = super_device,
1562 .metadata = metadata.get(),
1563 .partition = &partition,
1564 .partition_opener = &opener,
1565 .timeout_ms = timeout_ms,
1566 };
1567 std::string ignore_path;
1568 if (!MapPartitionWithSnapshot(lock.get(), std::move(params), &ignore_path)) {
1569 return false;
1570 }
1571 }
1572
1573 LOG(INFO) << "Created logical partitions with snapshot.";
1574 return true;
1575 }
1576
GetRemainingTime(const std::chrono::milliseconds & timeout,const std::chrono::time_point<std::chrono::steady_clock> & begin)1577 static std::chrono::milliseconds GetRemainingTime(
1578 const std::chrono::milliseconds& timeout,
1579 const std::chrono::time_point<std::chrono::steady_clock>& begin) {
1580 // If no timeout is specified, execute all commands without specifying any timeout.
1581 if (timeout.count() == 0) return std::chrono::milliseconds(0);
1582 auto passed_time = std::chrono::steady_clock::now() - begin;
1583 auto remaining_time = timeout - duration_cast<std::chrono::milliseconds>(passed_time);
1584 if (remaining_time.count() <= 0) {
1585 LOG(ERROR) << "MapPartitionWithSnapshot has reached timeout " << timeout.count() << "ms ("
1586 << remaining_time.count() << "ms remaining)";
1587 // Return min() instead of remaining_time here because 0 is treated as a special value for
1588 // no timeout, where the rest of the commands will still be executed.
1589 return std::chrono::milliseconds::min();
1590 }
1591 return remaining_time;
1592 }
1593
MapPartitionWithSnapshot(LockedFile * lock,CreateLogicalPartitionParams params,std::string * path)1594 bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
1595 CreateLogicalPartitionParams params,
1596 std::string* path) {
1597 auto begin = std::chrono::steady_clock::now();
1598
1599 CHECK(lock);
1600 path->clear();
1601
1602 if (params.GetPartitionName() != params.GetDeviceName()) {
1603 LOG(ERROR) << "Mapping snapshot with a different name is unsupported: partition_name = "
1604 << params.GetPartitionName() << ", device_name = " << params.GetDeviceName();
1605 return false;
1606 }
1607
1608 // Fill out fields in CreateLogicalPartitionParams so that we have more information (e.g. by
1609 // reading super partition metadata).
1610 CreateLogicalPartitionParams::OwnedData params_owned_data;
1611 if (!params.InitDefaults(¶ms_owned_data)) {
1612 return false;
1613 }
1614
1615 if (!params.partition->num_extents) {
1616 LOG(INFO) << "Skipping zero-length logical partition: " << params.GetPartitionName();
1617 return true; // leave path empty to indicate that nothing is mapped.
1618 }
1619
1620 // Determine if there is a live snapshot for the SnapshotStatus of the partition; i.e. if the
1621 // partition still has a snapshot that needs to be mapped. If no live snapshot or merge
1622 // completed, live_snapshot_status is set to nullopt.
1623 std::optional<SnapshotStatus> live_snapshot_status;
1624 do {
1625 if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
1626 LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
1627 << params.GetPartitionName();
1628 break;
1629 }
1630 auto file_path = GetSnapshotStatusFilePath(params.GetPartitionName());
1631 if (access(file_path.c_str(), F_OK) != 0) {
1632 if (errno != ENOENT) {
1633 PLOG(INFO) << "Can't map snapshot for " << params.GetPartitionName()
1634 << ": Can't access " << file_path;
1635 return false;
1636 }
1637 break;
1638 }
1639 live_snapshot_status = std::make_optional<SnapshotStatus>();
1640 if (!ReadSnapshotStatus(lock, params.GetPartitionName(), &*live_snapshot_status)) {
1641 return false;
1642 }
1643 // No live snapshot if merge is completed.
1644 if (live_snapshot_status->state() == SnapshotState::MERGE_COMPLETED) {
1645 live_snapshot_status.reset();
1646 }
1647
1648 if (live_snapshot_status->state() == SnapshotState::NONE ||
1649 live_snapshot_status->cow_partition_size() + live_snapshot_status->cow_file_size() ==
1650 0) {
1651 LOG(WARNING) << "Snapshot status for " << params.GetPartitionName()
1652 << " is invalid, ignoring: state = "
1653 << SnapshotState_Name(live_snapshot_status->state())
1654 << ", cow_partition_size = " << live_snapshot_status->cow_partition_size()
1655 << ", cow_file_size = " << live_snapshot_status->cow_file_size();
1656 live_snapshot_status.reset();
1657 }
1658 } while (0);
1659
1660 if (live_snapshot_status.has_value()) {
1661 // dm-snapshot requires the base device to be writable.
1662 params.force_writable = true;
1663 // Map the base device with a different name to avoid collision.
1664 params.device_name = GetBaseDeviceName(params.GetPartitionName());
1665 }
1666
1667 AutoDeviceList created_devices;
1668
1669 // Create the base device for the snapshot, or if there is no snapshot, the
1670 // device itself. This device consists of the real blocks in the super
1671 // partition that this logical partition occupies.
1672 auto& dm = DeviceMapper::Instance();
1673 std::string base_path;
1674 if (!CreateLogicalPartition(params, &base_path)) {
1675 LOG(ERROR) << "Could not create logical partition " << params.GetPartitionName()
1676 << " as device " << params.GetDeviceName();
1677 return false;
1678 }
1679 created_devices.EmplaceBack<AutoUnmapDevice>(&dm, params.GetDeviceName());
1680
1681 if (!live_snapshot_status.has_value()) {
1682 *path = base_path;
1683 created_devices.Release();
1684 return true;
1685 }
1686
1687 // We don't have ueventd in first-stage init, so use device major:minor
1688 // strings instead.
1689 std::string base_device;
1690 if (!dm.GetDeviceString(params.GetDeviceName(), &base_device)) {
1691 LOG(ERROR) << "Could not determine major/minor for: " << params.GetDeviceName();
1692 return false;
1693 }
1694
1695 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1696 if (remaining_time.count() < 0) return false;
1697
1698 std::string cow_name;
1699 CreateLogicalPartitionParams cow_params = params;
1700 cow_params.timeout_ms = remaining_time;
1701 if (!MapCowDevices(lock, cow_params, *live_snapshot_status, &created_devices, &cow_name)) {
1702 return false;
1703 }
1704 std::string cow_device;
1705 if (!GetMappedImageDeviceStringOrPath(cow_name, &cow_device)) {
1706 LOG(ERROR) << "Could not determine major/minor for: " << cow_name;
1707 return false;
1708 }
1709
1710 remaining_time = GetRemainingTime(params.timeout_ms, begin);
1711 if (remaining_time.count() < 0) return false;
1712
1713 if (!MapSnapshot(lock, params.GetPartitionName(), base_device, cow_device, remaining_time,
1714 path)) {
1715 LOG(ERROR) << "Could not map snapshot for partition: " << params.GetPartitionName();
1716 return false;
1717 }
1718 // No need to add params.GetPartitionName() to created_devices since it is immediately released.
1719
1720 created_devices.Release();
1721
1722 LOG(INFO) << "Mapped " << params.GetPartitionName() << " as snapshot device at " << *path;
1723
1724 return true;
1725 }
1726
UnmapPartitionWithSnapshot(LockedFile * lock,const std::string & target_partition_name)1727 bool SnapshotManager::UnmapPartitionWithSnapshot(LockedFile* lock,
1728 const std::string& target_partition_name) {
1729 CHECK(lock);
1730
1731 if (!UnmapSnapshot(lock, target_partition_name)) {
1732 return false;
1733 }
1734
1735 if (!UnmapCowDevices(lock, target_partition_name)) {
1736 return false;
1737 }
1738
1739 auto& dm = DeviceMapper::Instance();
1740 std::string base_name = GetBaseDeviceName(target_partition_name);
1741 if (!dm.DeleteDeviceIfExists(base_name)) {
1742 LOG(ERROR) << "Cannot delete base device: " << base_name;
1743 return false;
1744 }
1745
1746 LOG(INFO) << "Successfully unmapped snapshot " << target_partition_name;
1747
1748 return true;
1749 }
1750
MapCowDevices(LockedFile * lock,const CreateLogicalPartitionParams & params,const SnapshotStatus & snapshot_status,AutoDeviceList * created_devices,std::string * cow_name)1751 bool SnapshotManager::MapCowDevices(LockedFile* lock, const CreateLogicalPartitionParams& params,
1752 const SnapshotStatus& snapshot_status,
1753 AutoDeviceList* created_devices, std::string* cow_name) {
1754 CHECK(lock);
1755 CHECK(snapshot_status.cow_partition_size() + snapshot_status.cow_file_size() > 0);
1756 auto begin = std::chrono::steady_clock::now();
1757
1758 std::string partition_name = params.GetPartitionName();
1759 std::string cow_image_name = GetCowImageDeviceName(partition_name);
1760 *cow_name = GetCowName(partition_name);
1761
1762 auto& dm = DeviceMapper::Instance();
1763
1764 // Map COW image if necessary.
1765 if (snapshot_status.cow_file_size() > 0) {
1766 if (!EnsureImageManager()) return false;
1767 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1768 if (remaining_time.count() < 0) return false;
1769
1770 if (!MapCowImage(partition_name, remaining_time).has_value()) {
1771 LOG(ERROR) << "Could not map cow image for partition: " << partition_name;
1772 return false;
1773 }
1774 created_devices->EmplaceBack<AutoUnmapImage>(images_.get(), cow_image_name);
1775
1776 // If no COW partition exists, just return the image alone.
1777 if (snapshot_status.cow_partition_size() == 0) {
1778 *cow_name = std::move(cow_image_name);
1779 LOG(INFO) << "Mapped COW image for " << partition_name << " at " << *cow_name;
1780 return true;
1781 }
1782 }
1783
1784 auto remaining_time = GetRemainingTime(params.timeout_ms, begin);
1785 if (remaining_time.count() < 0) return false;
1786
1787 CHECK(snapshot_status.cow_partition_size() > 0);
1788
1789 // Create the DmTable for the COW device. It is the DmTable of the COW partition plus
1790 // COW image device as the last extent.
1791 CreateLogicalPartitionParams cow_partition_params = params;
1792 cow_partition_params.partition = nullptr;
1793 cow_partition_params.partition_name = *cow_name;
1794 cow_partition_params.device_name.clear();
1795 DmTable table;
1796 if (!CreateDmTable(cow_partition_params, &table)) {
1797 return false;
1798 }
1799 // If the COW image exists, append it as the last extent.
1800 if (snapshot_status.cow_file_size() > 0) {
1801 std::string cow_image_device;
1802 if (!GetMappedImageDeviceStringOrPath(cow_image_name, &cow_image_device)) {
1803 LOG(ERROR) << "Cannot determine major/minor for: " << cow_image_name;
1804 return false;
1805 }
1806 auto cow_partition_sectors = snapshot_status.cow_partition_size() / kSectorSize;
1807 auto cow_image_sectors = snapshot_status.cow_file_size() / kSectorSize;
1808 table.Emplace<DmTargetLinear>(cow_partition_sectors, cow_image_sectors, cow_image_device,
1809 0);
1810 }
1811
1812 // We have created the DmTable now. Map it.
1813 std::string cow_path;
1814 if (!dm.CreateDevice(*cow_name, table, &cow_path, remaining_time)) {
1815 LOG(ERROR) << "Could not create COW device: " << *cow_name;
1816 return false;
1817 }
1818 created_devices->EmplaceBack<AutoUnmapDevice>(&dm, *cow_name);
1819 LOG(INFO) << "Mapped COW device for " << params.GetPartitionName() << " at " << cow_path;
1820 return true;
1821 }
1822
UnmapCowDevices(LockedFile * lock,const std::string & name)1823 bool SnapshotManager::UnmapCowDevices(LockedFile* lock, const std::string& name) {
1824 CHECK(lock);
1825 if (!EnsureImageManager()) return false;
1826
1827 auto& dm = DeviceMapper::Instance();
1828 auto cow_name = GetCowName(name);
1829 if (!dm.DeleteDeviceIfExists(cow_name)) {
1830 LOG(ERROR) << "Cannot unmap " << cow_name;
1831 return false;
1832 }
1833
1834 std::string cow_image_name = GetCowImageDeviceName(name);
1835 if (!images_->UnmapImageIfExists(cow_image_name)) {
1836 LOG(ERROR) << "Cannot unmap image " << cow_image_name;
1837 return false;
1838 }
1839 return true;
1840 }
1841
OpenFile(const std::string & file,int lock_flags)1842 auto SnapshotManager::OpenFile(const std::string& file, int lock_flags)
1843 -> std::unique_ptr<LockedFile> {
1844 unique_fd fd(open(file.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
1845 if (fd < 0) {
1846 PLOG(ERROR) << "Open failed: " << file;
1847 return nullptr;
1848 }
1849 if (lock_flags != 0 && TEMP_FAILURE_RETRY(flock(fd, lock_flags)) < 0) {
1850 PLOG(ERROR) << "Acquire flock failed: " << file;
1851 return nullptr;
1852 }
1853 // For simplicity, we want to CHECK that lock_mode == LOCK_EX, in some
1854 // calls, so strip extra flags.
1855 int lock_mode = lock_flags & (LOCK_EX | LOCK_SH);
1856 return std::make_unique<LockedFile>(file, std::move(fd), lock_mode);
1857 }
1858
~LockedFile()1859 SnapshotManager::LockedFile::~LockedFile() {
1860 if (TEMP_FAILURE_RETRY(flock(fd_, LOCK_UN)) < 0) {
1861 PLOG(ERROR) << "Failed to unlock file: " << path_;
1862 }
1863 }
1864
GetStateFilePath() const1865 std::string SnapshotManager::GetStateFilePath() const {
1866 return metadata_dir_ + "/state"s;
1867 }
1868
GetMergeStateFilePath() const1869 std::string SnapshotManager::GetMergeStateFilePath() const {
1870 return metadata_dir_ + "/merge_state"s;
1871 }
1872
GetLockPath() const1873 std::string SnapshotManager::GetLockPath() const {
1874 return metadata_dir_;
1875 }
1876
OpenLock(int lock_flags)1877 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::OpenLock(int lock_flags) {
1878 auto lock_file = GetLockPath();
1879 return OpenFile(lock_file, lock_flags);
1880 }
1881
LockShared()1882 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockShared() {
1883 return OpenLock(LOCK_SH);
1884 }
1885
LockExclusive()1886 std::unique_ptr<SnapshotManager::LockedFile> SnapshotManager::LockExclusive() {
1887 return OpenLock(LOCK_EX);
1888 }
1889
UpdateStateFromString(const std::string & contents)1890 static UpdateState UpdateStateFromString(const std::string& contents) {
1891 if (contents.empty() || contents == "none") {
1892 return UpdateState::None;
1893 } else if (contents == "initiated") {
1894 return UpdateState::Initiated;
1895 } else if (contents == "unverified") {
1896 return UpdateState::Unverified;
1897 } else if (contents == "merging") {
1898 return UpdateState::Merging;
1899 } else if (contents == "merge-completed") {
1900 return UpdateState::MergeCompleted;
1901 } else if (contents == "merge-needs-reboot") {
1902 return UpdateState::MergeNeedsReboot;
1903 } else if (contents == "merge-failed") {
1904 return UpdateState::MergeFailed;
1905 } else if (contents == "cancelled") {
1906 return UpdateState::Cancelled;
1907 } else {
1908 LOG(ERROR) << "Unknown merge state in update state file: \"" << contents << "\"";
1909 return UpdateState::None;
1910 }
1911 }
1912
operator <<(std::ostream & os,UpdateState state)1913 std::ostream& operator<<(std::ostream& os, UpdateState state) {
1914 switch (state) {
1915 case UpdateState::None:
1916 return os << "none";
1917 case UpdateState::Initiated:
1918 return os << "initiated";
1919 case UpdateState::Unverified:
1920 return os << "unverified";
1921 case UpdateState::Merging:
1922 return os << "merging";
1923 case UpdateState::MergeCompleted:
1924 return os << "merge-completed";
1925 case UpdateState::MergeNeedsReboot:
1926 return os << "merge-needs-reboot";
1927 case UpdateState::MergeFailed:
1928 return os << "merge-failed";
1929 case UpdateState::Cancelled:
1930 return os << "cancelled";
1931 default:
1932 LOG(ERROR) << "Unknown update state: " << static_cast<uint32_t>(state);
1933 return os;
1934 }
1935 }
1936
ReadUpdateState(LockedFile * lock)1937 UpdateState SnapshotManager::ReadUpdateState(LockedFile* lock) {
1938 SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock);
1939 return status.state();
1940 }
1941
ReadSnapshotUpdateStatus(LockedFile * lock)1942 SnapshotUpdateStatus SnapshotManager::ReadSnapshotUpdateStatus(LockedFile* lock) {
1943 CHECK(lock);
1944
1945 SnapshotUpdateStatus status = {};
1946 std::string contents;
1947 if (!android::base::ReadFileToString(GetStateFilePath(), &contents)) {
1948 PLOG(ERROR) << "Read state file failed";
1949 status.set_state(UpdateState::None);
1950 return status;
1951 }
1952
1953 if (!status.ParseFromString(contents)) {
1954 LOG(WARNING) << "Unable to parse state file as SnapshotUpdateStatus, using the old format";
1955
1956 // Try to rollback to legacy file to support devices that are
1957 // currently using the old file format.
1958 // TODO(b/147409432)
1959 status.set_state(UpdateStateFromString(contents));
1960 }
1961
1962 return status;
1963 }
1964
WriteUpdateState(LockedFile * lock,UpdateState state)1965 bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state) {
1966 SnapshotUpdateStatus status = {};
1967 status.set_state(state);
1968 return WriteSnapshotUpdateStatus(lock, status);
1969 }
1970
WriteSnapshotUpdateStatus(LockedFile * lock,const SnapshotUpdateStatus & status)1971 bool SnapshotManager::WriteSnapshotUpdateStatus(LockedFile* lock,
1972 const SnapshotUpdateStatus& status) {
1973 CHECK(lock);
1974 CHECK(lock->lock_mode() == LOCK_EX);
1975
1976 std::string contents;
1977 if (!status.SerializeToString(&contents)) {
1978 LOG(ERROR) << "Unable to serialize SnapshotUpdateStatus.";
1979 return false;
1980 }
1981
1982 #ifdef LIBSNAPSHOT_USE_HAL
1983 auto merge_status = MergeStatus::UNKNOWN;
1984 switch (status.state()) {
1985 // The needs-reboot and completed cases imply that /data and /metadata
1986 // can be safely wiped, so we don't report a merge status.
1987 case UpdateState::None:
1988 case UpdateState::MergeNeedsReboot:
1989 case UpdateState::MergeCompleted:
1990 case UpdateState::Initiated:
1991 merge_status = MergeStatus::NONE;
1992 break;
1993 case UpdateState::Unverified:
1994 merge_status = MergeStatus::SNAPSHOTTED;
1995 break;
1996 case UpdateState::Merging:
1997 case UpdateState::MergeFailed:
1998 merge_status = MergeStatus::MERGING;
1999 break;
2000 default:
2001 // Note that Cancelled flows to here - it is never written, since
2002 // it only communicates a transient state to the caller.
2003 LOG(ERROR) << "Unexpected update status: " << status.state();
2004 break;
2005 }
2006
2007 bool set_before_write =
2008 merge_status == MergeStatus::SNAPSHOTTED || merge_status == MergeStatus::MERGING;
2009 if (set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2010 return false;
2011 }
2012 #endif
2013
2014 if (!WriteStringToFileAtomic(contents, GetStateFilePath())) {
2015 PLOG(ERROR) << "Could not write to state file";
2016 return false;
2017 }
2018
2019 #ifdef LIBSNAPSHOT_USE_HAL
2020 if (!set_before_write && !device_->SetBootControlMergeStatus(merge_status)) {
2021 return false;
2022 }
2023 #endif
2024 return true;
2025 }
2026
GetSnapshotStatusFilePath(const std::string & name)2027 std::string SnapshotManager::GetSnapshotStatusFilePath(const std::string& name) {
2028 auto file = metadata_dir_ + "/snapshots/"s + name;
2029 return file;
2030 }
2031
ReadSnapshotStatus(LockedFile * lock,const std::string & name,SnapshotStatus * status)2032 bool SnapshotManager::ReadSnapshotStatus(LockedFile* lock, const std::string& name,
2033 SnapshotStatus* status) {
2034 CHECK(lock);
2035 auto path = GetSnapshotStatusFilePath(name);
2036
2037 unique_fd fd(open(path.c_str(), O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
2038 if (fd < 0) {
2039 PLOG(ERROR) << "Open failed: " << path;
2040 return false;
2041 }
2042
2043 if (!status->ParseFromFileDescriptor(fd.get())) {
2044 PLOG(ERROR) << "Unable to parse " << path << " as SnapshotStatus";
2045 return false;
2046 }
2047
2048 if (status->name() != name) {
2049 LOG(WARNING) << "Found snapshot status named " << status->name() << " in " << path;
2050 status->set_name(name);
2051 }
2052
2053 return true;
2054 }
2055
WriteSnapshotStatus(LockedFile * lock,const SnapshotStatus & status)2056 bool SnapshotManager::WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status) {
2057 // The caller must take an exclusive lock to modify snapshots.
2058 CHECK(lock);
2059 CHECK(lock->lock_mode() == LOCK_EX);
2060 CHECK(!status.name().empty());
2061
2062 auto path = GetSnapshotStatusFilePath(status.name());
2063
2064 std::string content;
2065 if (!status.SerializeToString(&content)) {
2066 LOG(ERROR) << "Unable to serialize SnapshotStatus for " << status.name();
2067 return false;
2068 }
2069
2070 if (!WriteStringToFileAtomic(content, path)) {
2071 PLOG(ERROR) << "Unable to write SnapshotStatus to " << path;
2072 return false;
2073 }
2074
2075 return true;
2076 }
2077
GetSnapshotDeviceName(const std::string & snapshot_name,const SnapshotStatus & status)2078 std::string SnapshotManager::GetSnapshotDeviceName(const std::string& snapshot_name,
2079 const SnapshotStatus& status) {
2080 if (status.device_size() != status.snapshot_size()) {
2081 return GetSnapshotExtraDeviceName(snapshot_name);
2082 }
2083 return snapshot_name;
2084 }
2085
EnsureImageManager()2086 bool SnapshotManager::EnsureImageManager() {
2087 if (images_) return true;
2088
2089 // For now, use a preset timeout.
2090 images_ = android::fiemap::IImageManager::Open(gsid_dir_, 15000ms);
2091 if (!images_) {
2092 LOG(ERROR) << "Could not open ImageManager";
2093 return false;
2094 }
2095 return true;
2096 }
2097
ForceLocalImageManager()2098 bool SnapshotManager::ForceLocalImageManager() {
2099 images_ = android::fiemap::ImageManager::Open(gsid_dir_);
2100 if (!images_) {
2101 LOG(ERROR) << "Could not open ImageManager";
2102 return false;
2103 }
2104 has_local_image_manager_ = true;
2105 return true;
2106 }
2107
UnmapAndDeleteCowPartition(MetadataBuilder * current_metadata)2108 static void UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
2109 auto& dm = DeviceMapper::Instance();
2110 std::vector<std::string> to_delete;
2111 for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
2112 if (!dm.DeleteDeviceIfExists(existing_cow_partition->name())) {
2113 LOG(WARNING) << existing_cow_partition->name()
2114 << " cannot be unmapped and its space cannot be reclaimed";
2115 continue;
2116 }
2117 to_delete.push_back(existing_cow_partition->name());
2118 }
2119 for (const auto& name : to_delete) {
2120 current_metadata->RemovePartition(name);
2121 }
2122 }
2123
AddRequiredSpace(Return orig,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2124 static Return AddRequiredSpace(Return orig,
2125 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2126 if (orig.error_code() != Return::ErrorCode::NO_SPACE) {
2127 return orig;
2128 }
2129 uint64_t sum = 0;
2130 for (auto&& [name, status] : all_snapshot_status) {
2131 sum += status.cow_file_size();
2132 }
2133 return Return::NoSpace(sum);
2134 }
2135
CreateUpdateSnapshots(const DeltaArchiveManifest & manifest)2136 Return SnapshotManager::CreateUpdateSnapshots(const DeltaArchiveManifest& manifest) {
2137 auto lock = LockExclusive();
2138 if (!lock) return Return::Error();
2139
2140 // TODO(b/134949511): remove this check. Right now, with overlayfs mounted, the scratch
2141 // partition takes up a big chunk of space in super, causing COW images to be created on
2142 // retrofit Virtual A/B devices.
2143 if (device_->IsOverlayfsSetup()) {
2144 LOG(ERROR) << "Cannot create update snapshots with overlayfs setup. Run `adb enable-verity`"
2145 << ", reboot, then try again.";
2146 return Return::Error();
2147 }
2148
2149 const auto& opener = device_->GetPartitionOpener();
2150 auto current_suffix = device_->GetSlotSuffix();
2151 uint32_t current_slot = SlotNumberForSlotSuffix(current_suffix);
2152 auto target_suffix = device_->GetOtherSlotSuffix();
2153 uint32_t target_slot = SlotNumberForSlotSuffix(target_suffix);
2154 auto current_super = device_->GetSuperDevice(current_slot);
2155
2156 auto current_metadata = MetadataBuilder::New(opener, current_super, current_slot);
2157 if (current_metadata == nullptr) {
2158 LOG(ERROR) << "Cannot create metadata builder.";
2159 return Return::Error();
2160 }
2161
2162 auto target_metadata =
2163 MetadataBuilder::NewForUpdate(opener, current_super, current_slot, target_slot);
2164 if (target_metadata == nullptr) {
2165 LOG(ERROR) << "Cannot create target metadata builder.";
2166 return Return::Error();
2167 }
2168
2169 // Delete partitions with target suffix in |current_metadata|. Otherwise,
2170 // partition_cow_creator recognizes these left-over partitions as used space.
2171 for (const auto& group_name : current_metadata->ListGroups()) {
2172 if (android::base::EndsWith(group_name, target_suffix)) {
2173 current_metadata->RemoveGroupAndPartitions(group_name);
2174 }
2175 }
2176
2177 SnapshotMetadataUpdater metadata_updater(target_metadata.get(), target_slot, manifest);
2178 if (!metadata_updater.Update()) {
2179 LOG(ERROR) << "Cannot calculate new metadata.";
2180 return Return::Error();
2181 }
2182
2183 // Delete previous COW partitions in current_metadata so that PartitionCowCreator marks those as
2184 // free regions.
2185 UnmapAndDeleteCowPartition(current_metadata.get());
2186
2187 // Check that all these metadata is not retrofit dynamic partitions. Snapshots on
2188 // devices with retrofit dynamic partitions does not make sense.
2189 // This ensures that current_metadata->GetFreeRegions() uses the same device
2190 // indices as target_metadata (i.e. 0 -> "super").
2191 // This is also assumed in MapCowDevices() call below.
2192 CHECK(current_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME &&
2193 target_metadata->GetBlockDevicePartitionName(0) == LP_METADATA_DEFAULT_PARTITION_NAME);
2194
2195 std::map<std::string, SnapshotStatus> all_snapshot_status;
2196
2197 // In case of error, automatically delete devices that are created along the way.
2198 // Note that "lock" is destroyed after "created_devices", so it is safe to use |lock| for
2199 // these devices.
2200 AutoDeviceList created_devices;
2201
2202 PartitionCowCreator cow_creator{
2203 .target_metadata = target_metadata.get(),
2204 .target_suffix = target_suffix,
2205 .target_partition = nullptr,
2206 .current_metadata = current_metadata.get(),
2207 .current_suffix = current_suffix,
2208 .operations = nullptr,
2209 .extra_extents = {},
2210 };
2211
2212 auto ret = CreateUpdateSnapshotsInternal(lock.get(), manifest, &cow_creator, &created_devices,
2213 &all_snapshot_status);
2214 if (!ret.is_ok()) return ret;
2215
2216 auto exported_target_metadata = target_metadata->Export();
2217 if (exported_target_metadata == nullptr) {
2218 LOG(ERROR) << "Cannot export target metadata";
2219 return Return::Error();
2220 }
2221
2222 ret = InitializeUpdateSnapshots(lock.get(), target_metadata.get(),
2223 exported_target_metadata.get(), target_suffix,
2224 all_snapshot_status);
2225 if (!ret.is_ok()) return ret;
2226
2227 if (!UpdatePartitionTable(opener, device_->GetSuperDevice(target_slot),
2228 *exported_target_metadata, target_slot)) {
2229 LOG(ERROR) << "Cannot write target metadata";
2230 return Return::Error();
2231 }
2232
2233 created_devices.Release();
2234 LOG(INFO) << "Successfully created all snapshots for target slot " << target_suffix;
2235
2236 return Return::Ok();
2237 }
2238
CreateUpdateSnapshotsInternal(LockedFile * lock,const DeltaArchiveManifest & manifest,PartitionCowCreator * cow_creator,AutoDeviceList * created_devices,std::map<std::string,SnapshotStatus> * all_snapshot_status)2239 Return SnapshotManager::CreateUpdateSnapshotsInternal(
2240 LockedFile* lock, const DeltaArchiveManifest& manifest, PartitionCowCreator* cow_creator,
2241 AutoDeviceList* created_devices,
2242 std::map<std::string, SnapshotStatus>* all_snapshot_status) {
2243 CHECK(lock);
2244
2245 auto* target_metadata = cow_creator->target_metadata;
2246 const auto& target_suffix = cow_creator->target_suffix;
2247
2248 if (!target_metadata->AddGroup(kCowGroupName, 0)) {
2249 LOG(ERROR) << "Cannot add group " << kCowGroupName;
2250 return Return::Error();
2251 }
2252
2253 std::map<std::string, const RepeatedPtrField<InstallOperation>*> install_operation_map;
2254 std::map<std::string, std::vector<Extent>> extra_extents_map;
2255 for (const auto& partition_update : manifest.partitions()) {
2256 auto suffixed_name = partition_update.partition_name() + target_suffix;
2257 auto&& [it, inserted] =
2258 install_operation_map.emplace(suffixed_name, &partition_update.operations());
2259 if (!inserted) {
2260 LOG(ERROR) << "Duplicated partition " << partition_update.partition_name()
2261 << " in update manifest.";
2262 return Return::Error();
2263 }
2264
2265 auto& extra_extents = extra_extents_map[suffixed_name];
2266 if (partition_update.has_hash_tree_extent()) {
2267 extra_extents.push_back(partition_update.hash_tree_extent());
2268 }
2269 if (partition_update.has_fec_extent()) {
2270 extra_extents.push_back(partition_update.fec_extent());
2271 }
2272 }
2273
2274 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2275 cow_creator->target_partition = target_partition;
2276 cow_creator->operations = nullptr;
2277 auto operations_it = install_operation_map.find(target_partition->name());
2278 if (operations_it != install_operation_map.end()) {
2279 cow_creator->operations = operations_it->second;
2280 }
2281
2282 cow_creator->extra_extents.clear();
2283 auto extra_extents_it = extra_extents_map.find(target_partition->name());
2284 if (extra_extents_it != extra_extents_map.end()) {
2285 cow_creator->extra_extents = std::move(extra_extents_it->second);
2286 }
2287
2288 // Compute the device sizes for the partition.
2289 auto cow_creator_ret = cow_creator->Run();
2290 if (!cow_creator_ret.has_value()) {
2291 return Return::Error();
2292 }
2293
2294 LOG(INFO) << "For partition " << target_partition->name()
2295 << ", device size = " << cow_creator_ret->snapshot_status.device_size()
2296 << ", snapshot size = " << cow_creator_ret->snapshot_status.snapshot_size()
2297 << ", cow partition size = "
2298 << cow_creator_ret->snapshot_status.cow_partition_size()
2299 << ", cow file size = " << cow_creator_ret->snapshot_status.cow_file_size();
2300
2301 // Delete any existing snapshot before re-creating one.
2302 if (!DeleteSnapshot(lock, target_partition->name())) {
2303 LOG(ERROR) << "Cannot delete existing snapshot before creating a new one for partition "
2304 << target_partition->name();
2305 return Return::Error();
2306 }
2307
2308 // It is possible that the whole partition uses free space in super, and snapshot / COW
2309 // would not be needed. In this case, skip the partition.
2310 bool needs_snapshot = cow_creator_ret->snapshot_status.snapshot_size() > 0;
2311 bool needs_cow = (cow_creator_ret->snapshot_status.cow_partition_size() +
2312 cow_creator_ret->snapshot_status.cow_file_size()) > 0;
2313 CHECK(needs_snapshot == needs_cow);
2314
2315 if (!needs_snapshot) {
2316 LOG(INFO) << "Skip creating snapshot for partition " << target_partition->name()
2317 << "because nothing needs to be snapshotted.";
2318 continue;
2319 }
2320
2321 // Store these device sizes to snapshot status file.
2322 if (!CreateSnapshot(lock, &cow_creator_ret->snapshot_status)) {
2323 return Return::Error();
2324 }
2325 created_devices->EmplaceBack<AutoDeleteSnapshot>(this, lock, target_partition->name());
2326
2327 // Create the COW partition. That is, use any remaining free space in super partition before
2328 // creating the COW images.
2329 if (cow_creator_ret->snapshot_status.cow_partition_size() > 0) {
2330 CHECK(cow_creator_ret->snapshot_status.cow_partition_size() % kSectorSize == 0)
2331 << "cow_partition_size == "
2332 << cow_creator_ret->snapshot_status.cow_partition_size()
2333 << " is not a multiple of sector size " << kSectorSize;
2334 auto cow_partition = target_metadata->AddPartition(GetCowName(target_partition->name()),
2335 kCowGroupName, 0 /* flags */);
2336 if (cow_partition == nullptr) {
2337 return Return::Error();
2338 }
2339
2340 if (!target_metadata->ResizePartition(
2341 cow_partition, cow_creator_ret->snapshot_status.cow_partition_size(),
2342 cow_creator_ret->cow_partition_usable_regions)) {
2343 LOG(ERROR) << "Cannot create COW partition on metadata with size "
2344 << cow_creator_ret->snapshot_status.cow_partition_size();
2345 return Return::Error();
2346 }
2347 // Only the in-memory target_metadata is modified; nothing to clean up if there is an
2348 // error in the future.
2349 }
2350
2351 all_snapshot_status->emplace(target_partition->name(),
2352 std::move(cow_creator_ret->snapshot_status));
2353
2354 LOG(INFO) << "Successfully created snapshot partition for " << target_partition->name();
2355 }
2356
2357 LOG(INFO) << "Allocating CoW images.";
2358
2359 for (auto&& [name, snapshot_status] : *all_snapshot_status) {
2360 // Create the backing COW image if necessary.
2361 if (snapshot_status.cow_file_size() > 0) {
2362 auto ret = CreateCowImage(lock, name);
2363 if (!ret.is_ok()) return AddRequiredSpace(ret, *all_snapshot_status);
2364 }
2365
2366 LOG(INFO) << "Successfully created snapshot for " << name;
2367 }
2368
2369 return Return::Ok();
2370 }
2371
InitializeUpdateSnapshots(LockedFile * lock,MetadataBuilder * target_metadata,const LpMetadata * exported_target_metadata,const std::string & target_suffix,const std::map<std::string,SnapshotStatus> & all_snapshot_status)2372 Return SnapshotManager::InitializeUpdateSnapshots(
2373 LockedFile* lock, MetadataBuilder* target_metadata,
2374 const LpMetadata* exported_target_metadata, const std::string& target_suffix,
2375 const std::map<std::string, SnapshotStatus>& all_snapshot_status) {
2376 CHECK(lock);
2377
2378 CreateLogicalPartitionParams cow_params{
2379 .block_device = LP_METADATA_DEFAULT_PARTITION_NAME,
2380 .metadata = exported_target_metadata,
2381 .timeout_ms = std::chrono::milliseconds::max(),
2382 .partition_opener = &device_->GetPartitionOpener(),
2383 };
2384 for (auto* target_partition : ListPartitionsWithSuffix(target_metadata, target_suffix)) {
2385 AutoDeviceList created_devices_for_cow;
2386
2387 if (!UnmapPartitionWithSnapshot(lock, target_partition->name())) {
2388 LOG(ERROR) << "Cannot unmap existing COW devices before re-mapping them for zero-fill: "
2389 << target_partition->name();
2390 return Return::Error();
2391 }
2392
2393 auto it = all_snapshot_status.find(target_partition->name());
2394 if (it == all_snapshot_status.end()) continue;
2395 cow_params.partition_name = target_partition->name();
2396 std::string cow_name;
2397 if (!MapCowDevices(lock, cow_params, it->second, &created_devices_for_cow, &cow_name)) {
2398 return Return::Error();
2399 }
2400
2401 std::string cow_path;
2402 if (!images_->GetMappedImageDevice(cow_name, &cow_path)) {
2403 LOG(ERROR) << "Cannot determine path for " << cow_name;
2404 return Return::Error();
2405 }
2406
2407 auto ret = InitializeCow(cow_path);
2408 if (!ret.is_ok()) {
2409 LOG(ERROR) << "Can't zero-fill COW device for " << target_partition->name() << ": "
2410 << cow_path;
2411 return AddRequiredSpace(ret, all_snapshot_status);
2412 }
2413 // Let destructor of created_devices_for_cow to unmap the COW devices.
2414 };
2415 return Return::Ok();
2416 }
2417
MapUpdateSnapshot(const CreateLogicalPartitionParams & params,std::string * snapshot_path)2418 bool SnapshotManager::MapUpdateSnapshot(const CreateLogicalPartitionParams& params,
2419 std::string* snapshot_path) {
2420 auto lock = LockShared();
2421 if (!lock) return false;
2422 if (!UnmapPartitionWithSnapshot(lock.get(), params.GetPartitionName())) {
2423 LOG(ERROR) << "Cannot unmap existing snapshot before re-mapping it: "
2424 << params.GetPartitionName();
2425 return false;
2426 }
2427 return MapPartitionWithSnapshot(lock.get(), params, snapshot_path);
2428 }
2429
UnmapUpdateSnapshot(const std::string & target_partition_name)2430 bool SnapshotManager::UnmapUpdateSnapshot(const std::string& target_partition_name) {
2431 auto lock = LockShared();
2432 if (!lock) return false;
2433 return UnmapPartitionWithSnapshot(lock.get(), target_partition_name);
2434 }
2435
UnmapAllPartitions()2436 bool SnapshotManager::UnmapAllPartitions() {
2437 auto lock = LockExclusive();
2438 if (!lock) return false;
2439
2440 const auto& opener = device_->GetPartitionOpener();
2441 uint32_t slot = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2442 auto super_device = device_->GetSuperDevice(slot);
2443 auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, slot);
2444 if (!metadata) {
2445 LOG(ERROR) << "Could not read dynamic partition metadata for device: " << super_device;
2446 return false;
2447 }
2448
2449 bool ok = true;
2450 for (const auto& partition : metadata->partitions) {
2451 auto partition_name = GetPartitionName(partition);
2452 ok &= UnmapPartitionWithSnapshot(lock.get(), partition_name);
2453 }
2454 return ok;
2455 }
2456
operator <<(std::ostream & os,SnapshotManager::Slot slot)2457 std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
2458 switch (slot) {
2459 case SnapshotManager::Slot::Unknown:
2460 return os << "unknown";
2461 case SnapshotManager::Slot::Source:
2462 return os << "source";
2463 case SnapshotManager::Slot::Target:
2464 return os << "target";
2465 }
2466 }
2467
Dump(std::ostream & os)2468 bool SnapshotManager::Dump(std::ostream& os) {
2469 // Don't actually lock. Dump() is for debugging purposes only, so it is okay
2470 // if it is racy.
2471 auto file = OpenLock(0 /* lock flag */);
2472 if (!file) return false;
2473
2474 std::stringstream ss;
2475
2476 ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;
2477
2478 ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
2479 ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;
2480 ss << "Rollback indicator: "
2481 << (access(GetRollbackIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
2482 << std::endl;
2483 ss << "Forward merge indicator: "
2484 << (access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0 ? "exists" : strerror(errno))
2485 << std::endl;
2486
2487 bool ok = true;
2488 std::vector<std::string> snapshots;
2489 if (!ListSnapshots(file.get(), &snapshots)) {
2490 LOG(ERROR) << "Could not list snapshots";
2491 snapshots.clear();
2492 ok = false;
2493 }
2494 for (const auto& name : snapshots) {
2495 ss << "Snapshot: " << name << std::endl;
2496 SnapshotStatus status;
2497 if (!ReadSnapshotStatus(file.get(), name, &status)) {
2498 ok = false;
2499 continue;
2500 }
2501 ss << " state: " << SnapshotState_Name(status.state()) << std::endl;
2502 ss << " device size (bytes): " << status.device_size() << std::endl;
2503 ss << " snapshot size (bytes): " << status.snapshot_size() << std::endl;
2504 ss << " cow partition size (bytes): " << status.cow_partition_size() << std::endl;
2505 ss << " cow file size (bytes): " << status.cow_file_size() << std::endl;
2506 ss << " allocated sectors: " << status.sectors_allocated() << std::endl;
2507 ss << " metadata sectors: " << status.metadata_sectors() << std::endl;
2508 }
2509 os << ss.rdbuf();
2510 return ok;
2511 }
2512
EnsureMetadataMounted()2513 std::unique_ptr<AutoDevice> SnapshotManager::EnsureMetadataMounted() {
2514 if (!device_->IsRecovery()) {
2515 // No need to mount anything in recovery.
2516 LOG(INFO) << "EnsureMetadataMounted does nothing in Android mode.";
2517 return std::unique_ptr<AutoUnmountDevice>(new AutoUnmountDevice());
2518 }
2519 auto ret = AutoUnmountDevice::New(device_->GetMetadataDir());
2520 if (ret == nullptr) return nullptr;
2521
2522 // In rescue mode, it is possible to erase and format metadata, but /metadata/ota is not
2523 // created to execute snapshot updates. Hence, subsequent calls is likely to fail because
2524 // Lock*() fails. By failing early and returning nullptr here, update_engine_sideload can
2525 // treat this case as if /metadata is not mounted.
2526 if (!LockShared()) {
2527 LOG(WARNING) << "/metadata is mounted, but errors occur when acquiring a shared lock. "
2528 "Subsequent calls to SnapshotManager will fail. Unmounting /metadata now.";
2529 return nullptr;
2530 }
2531 return ret;
2532 }
2533
HandleImminentDataWipe(const std::function<void ()> & callback)2534 bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
2535 if (!device_->IsRecovery()) {
2536 LOG(ERROR) << "Data wipes are only allowed in recovery.";
2537 return false;
2538 }
2539
2540 auto mount = EnsureMetadataMounted();
2541 if (!mount || !mount->HasDevice()) {
2542 // We allow the wipe to continue, because if we can't mount /metadata,
2543 // it is unlikely the device would have booted anyway. If there is no
2544 // metadata partition, then the device predates Virtual A/B.
2545 return true;
2546 }
2547
2548 // Check this early, so we don't accidentally start trying to populate
2549 // the state file in recovery. Note we don't call GetUpdateState since
2550 // we want errors in acquiring the lock to be propagated, instead of
2551 // returning UpdateState::None.
2552 auto state_file = GetStateFilePath();
2553 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2554 return true;
2555 }
2556
2557 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2558 auto super_path = device_->GetSuperDevice(slot_number);
2559 if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2560 LOG(ERROR) << "Unable to map partitions to complete merge.";
2561 return false;
2562 }
2563
2564 auto process_callback = [&]() -> bool {
2565 if (callback) {
2566 callback();
2567 }
2568 return true;
2569 };
2570
2571 in_factory_data_reset_ = true;
2572 bool ok = ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
2573 in_factory_data_reset_ = false;
2574
2575 if (!ok) {
2576 return false;
2577 }
2578
2579 // Nothing should be depending on partitions now, so unmap them all.
2580 if (!UnmapAllPartitions()) {
2581 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
2582 }
2583 return true;
2584 }
2585
FinishMergeInRecovery()2586 bool SnapshotManager::FinishMergeInRecovery() {
2587 if (!device_->IsRecovery()) {
2588 LOG(ERROR) << "Data wipes are only allowed in recovery.";
2589 return false;
2590 }
2591
2592 auto mount = EnsureMetadataMounted();
2593 if (!mount || !mount->HasDevice()) {
2594 return false;
2595 }
2596
2597 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2598 auto super_path = device_->GetSuperDevice(slot_number);
2599 if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2600 LOG(ERROR) << "Unable to map partitions to complete merge.";
2601 return false;
2602 }
2603
2604 UpdateState state = ProcessUpdateState();
2605 if (state != UpdateState::MergeCompleted) {
2606 LOG(ERROR) << "Merge returned unexpected status: " << state;
2607 return false;
2608 }
2609
2610 // Nothing should be depending on partitions now, so unmap them all.
2611 if (!UnmapAllPartitions()) {
2612 LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
2613 }
2614 return true;
2615 }
2616
ProcessUpdateStateOnDataWipe(bool allow_forward_merge,const std::function<bool ()> & callback)2617 bool SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
2618 const std::function<bool()>& callback) {
2619 auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
2620 UpdateState state = ProcessUpdateState(callback);
2621 LOG(INFO) << "Update state in recovery: " << state;
2622 switch (state) {
2623 case UpdateState::MergeFailed:
2624 LOG(ERROR) << "Unrecoverable merge failure detected.";
2625 return false;
2626 case UpdateState::Unverified: {
2627 // If an OTA was just applied but has not yet started merging:
2628 //
2629 // - if forward merge is allowed, initiate merge and call
2630 // ProcessUpdateState again.
2631 //
2632 // - if forward merge is not allowed, we
2633 // have no choice but to revert slots, because the current slot will
2634 // immediately become unbootable. Rather than wait for the device
2635 // to reboot N times until a rollback, we proactively disable the
2636 // new slot instead.
2637 //
2638 // Since the rollback is inevitable, we don't treat a HAL failure
2639 // as an error here.
2640 auto slot = GetCurrentSlot();
2641 if (slot == Slot::Target) {
2642 if (allow_forward_merge &&
2643 access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
2644 LOG(INFO) << "Forward merge allowed, initiating merge now.";
2645 return InitiateMerge() &&
2646 ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
2647 }
2648
2649 LOG(ERROR) << "Reverting to old slot since update will be deleted.";
2650 device_->SetSlotAsUnbootable(slot_number);
2651 } else {
2652 LOG(INFO) << "Booting from " << slot << " slot, no action is taken.";
2653 }
2654 break;
2655 }
2656 case UpdateState::MergeNeedsReboot:
2657 // We shouldn't get here, because nothing is depending on
2658 // logical partitions.
2659 LOG(ERROR) << "Unexpected merge-needs-reboot state in recovery.";
2660 break;
2661 default:
2662 break;
2663 }
2664 return true;
2665 }
2666
EnsureNoOverflowSnapshot(LockedFile * lock)2667 bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
2668 CHECK(lock);
2669
2670 std::vector<std::string> snapshots;
2671 if (!ListSnapshots(lock, &snapshots)) {
2672 LOG(ERROR) << "Could not list snapshots.";
2673 return false;
2674 }
2675
2676 auto& dm = DeviceMapper::Instance();
2677 for (const auto& snapshot : snapshots) {
2678 std::vector<DeviceMapper::TargetInfo> targets;
2679 if (!dm.GetTableStatus(snapshot, &targets)) {
2680 LOG(ERROR) << "Could not read snapshot device table: " << snapshot;
2681 return false;
2682 }
2683 if (targets.size() != 1) {
2684 LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << snapshot
2685 << ", size = " << targets.size();
2686 return false;
2687 }
2688 if (targets[0].IsOverflowSnapshot()) {
2689 LOG(ERROR) << "Detected overflow in snapshot " << snapshot
2690 << ", CoW device size computation is wrong!";
2691 return false;
2692 }
2693 }
2694
2695 return true;
2696 }
2697
RecoveryCreateSnapshotDevices()2698 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices() {
2699 if (!device_->IsRecovery()) {
2700 LOG(ERROR) << __func__ << " is only allowed in recovery.";
2701 return CreateResult::NOT_CREATED;
2702 }
2703
2704 auto mount = EnsureMetadataMounted();
2705 if (!mount || !mount->HasDevice()) {
2706 LOG(ERROR) << "Couldn't mount Metadata.";
2707 return CreateResult::NOT_CREATED;
2708 }
2709 return RecoveryCreateSnapshotDevices(mount);
2710 }
2711
RecoveryCreateSnapshotDevices(const std::unique_ptr<AutoDevice> & metadata_device)2712 CreateResult SnapshotManager::RecoveryCreateSnapshotDevices(
2713 const std::unique_ptr<AutoDevice>& metadata_device) {
2714 if (!device_->IsRecovery()) {
2715 LOG(ERROR) << __func__ << " is only allowed in recovery.";
2716 return CreateResult::NOT_CREATED;
2717 }
2718
2719 if (metadata_device == nullptr || !metadata_device->HasDevice()) {
2720 LOG(ERROR) << "Metadata not mounted.";
2721 return CreateResult::NOT_CREATED;
2722 }
2723
2724 auto state_file = GetStateFilePath();
2725 if (access(state_file.c_str(), F_OK) != 0 && errno == ENOENT) {
2726 LOG(ERROR) << "Couldn't access state file.";
2727 return CreateResult::NOT_CREATED;
2728 }
2729
2730 if (!NeedSnapshotsInFirstStageMount()) {
2731 return CreateResult::NOT_CREATED;
2732 }
2733
2734 auto slot_suffix = device_->GetOtherSlotSuffix();
2735 auto slot_number = SlotNumberForSlotSuffix(slot_suffix);
2736 auto super_path = device_->GetSuperDevice(slot_number);
2737 if (!CreateLogicalAndSnapshotPartitions(super_path)) {
2738 LOG(ERROR) << "Unable to map partitions.";
2739 return CreateResult::ERROR;
2740 }
2741 return CreateResult::CREATED;
2742 }
2743
UpdateForwardMergeIndicator(bool wipe)2744 bool SnapshotManager::UpdateForwardMergeIndicator(bool wipe) {
2745 auto path = GetForwardMergeIndicatorPath();
2746
2747 if (!wipe) {
2748 LOG(INFO) << "Wipe is not scheduled. Deleting forward merge indicator.";
2749 return RemoveFileIfExists(path);
2750 }
2751
2752 // TODO(b/152094219): Don't forward merge if no CoW file is allocated.
2753
2754 LOG(INFO) << "Wipe will be scheduled. Allowing forward merge of snapshots.";
2755 if (!android::base::WriteStringToFile("1", path)) {
2756 PLOG(ERROR) << "Unable to write forward merge indicator: " << path;
2757 return false;
2758 }
2759
2760 return true;
2761 }
2762
GetMappedImageDeviceStringOrPath(const std::string & device_name,std::string * device_string_or_mapped_path)2763 bool SnapshotManager::GetMappedImageDeviceStringOrPath(const std::string& device_name,
2764 std::string* device_string_or_mapped_path) {
2765 auto& dm = DeviceMapper::Instance();
2766 // Try getting the device string if it is a device mapper device.
2767 if (dm.GetState(device_name) != DmDeviceState::INVALID) {
2768 return dm.GetDeviceString(device_name, device_string_or_mapped_path);
2769 }
2770
2771 // Otherwise, get path from IImageManager.
2772 if (!images_->GetMappedImageDevice(device_name, device_string_or_mapped_path)) {
2773 return false;
2774 }
2775
2776 LOG(WARNING) << "Calling GetMappedImageDevice with local image manager; device "
2777 << (device_string_or_mapped_path ? *device_string_or_mapped_path : "(nullptr)")
2778 << "may not be available in first stage init! ";
2779 return true;
2780 }
2781
2782 } // namespace snapshot
2783 } // namespace android
2784