1 //
2 // Copyright (C) 2020 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "update_engine/aosp/cleanup_previous_update_action.h"
17
18 #include <chrono> // NOLINT(build/c++11) -- for merge times
19 #include <functional>
20 #include <string>
21 #include <type_traits>
22
23 #include <android-base/chrono_utils.h>
24 #include <android-base/properties.h>
25 #include <base/bind.h>
26
27 #ifndef __ANDROID_RECOVERY__
28 #include <statslog_ue.h>
29 #endif
30
31 #include "update_engine/common/utils.h"
32 #include "update_engine/payload_consumer/delta_performer.h"
33
34 using android::base::GetBoolProperty;
35 using android::snapshot::ISnapshotManager;
36 using android::snapshot::SnapshotMergeStats;
37 using android::snapshot::UpdateState;
38 using brillo::MessageLoop;
39
40 constexpr char kBootCompletedProp[] = "sys.boot_completed";
41 // Interval to check sys.boot_completed.
42 constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
43 // Interval to check IBootControl::isSlotMarkedSuccessful
44 constexpr auto kCheckSlotMarkedSuccessfulInterval =
45 base::TimeDelta::FromSeconds(2);
46 // Interval to call SnapshotManager::ProcessUpdateState
47 constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
48
49 #ifdef __ANDROID_RECOVERY__
50 static constexpr bool kIsRecovery = true;
51 #else
52 static constexpr bool kIsRecovery = false;
53 #endif
54
55 namespace chromeos_update_engine {
56
CleanupPreviousUpdateAction(PrefsInterface * prefs,BootControlInterface * boot_control,android::snapshot::ISnapshotManager * snapshot,CleanupPreviousUpdateActionDelegateInterface * delegate)57 CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
58 PrefsInterface* prefs,
59 BootControlInterface* boot_control,
60 android::snapshot::ISnapshotManager* snapshot,
61 CleanupPreviousUpdateActionDelegateInterface* delegate)
62 : prefs_(prefs),
63 boot_control_(boot_control),
64 snapshot_(snapshot),
65 delegate_(delegate),
66 running_(false),
67 cancel_failed_(false),
68 last_percentage_(0),
69 merge_stats_(nullptr) {}
70
~CleanupPreviousUpdateAction()71 CleanupPreviousUpdateAction::~CleanupPreviousUpdateAction() {
72 StopActionInternal();
73 }
74
PerformAction()75 void CleanupPreviousUpdateAction::PerformAction() {
76 StartActionInternal();
77 }
78
TerminateProcessing()79 void CleanupPreviousUpdateAction::TerminateProcessing() {
80 StopActionInternal();
81 }
82
ResumeAction()83 void CleanupPreviousUpdateAction::ResumeAction() {
84 StartActionInternal();
85 }
86
SuspendAction()87 void CleanupPreviousUpdateAction::SuspendAction() {
88 StopActionInternal();
89 }
90
ActionCompleted(ErrorCode error_code)91 void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
92 StopActionInternal();
93 ReportMergeStats();
94 metadata_device_ = nullptr;
95 }
96
Type() const97 std::string CleanupPreviousUpdateAction::Type() const {
98 return StaticType();
99 }
100
StaticType()101 std::string CleanupPreviousUpdateAction::StaticType() {
102 return "CleanupPreviousUpdateAction";
103 }
104
105 // This function is called at the beginning of all delayed functions. By
106 // resetting |scheduled_task_|, the delayed function acknowledges that the task
107 // has already been executed, therefore there's no need to cancel it in the
108 // future. This avoids StopActionInternal() from resetting task IDs in an
109 // unexpected way because task IDs could be reused.
AcknowledgeTaskExecuted()110 void CleanupPreviousUpdateAction::AcknowledgeTaskExecuted() {
111 if (scheduled_task_ != MessageLoop::kTaskIdNull) {
112 LOG(INFO) << "Executing task " << scheduled_task_;
113 }
114 scheduled_task_ = MessageLoop::kTaskIdNull;
115 }
116
117 // Check that scheduled_task_ is a valid task ID. Otherwise, terminate the
118 // action.
CheckTaskScheduled(std::string_view name)119 void CleanupPreviousUpdateAction::CheckTaskScheduled(std::string_view name) {
120 if (scheduled_task_ == MessageLoop::kTaskIdNull) {
121 LOG(ERROR) << "Unable to schedule " << name;
122 processor_->ActionComplete(this, ErrorCode::kError);
123 } else {
124 LOG(INFO) << "CleanupPreviousUpdateAction scheduled task ID "
125 << scheduled_task_ << " for " << name;
126 }
127 }
128
StopActionInternal()129 void CleanupPreviousUpdateAction::StopActionInternal() {
130 LOG(INFO) << "Stopping/suspending/completing CleanupPreviousUpdateAction";
131 running_ = false;
132
133 if (scheduled_task_ != MessageLoop::kTaskIdNull) {
134 if (MessageLoop::current()->CancelTask(scheduled_task_)) {
135 LOG(INFO) << "CleanupPreviousUpdateAction cancelled pending task ID "
136 << scheduled_task_;
137 } else {
138 LOG(ERROR) << "CleanupPreviousUpdateAction unable to cancel task ID "
139 << scheduled_task_;
140 }
141 }
142 scheduled_task_ = MessageLoop::kTaskIdNull;
143 }
144
StartActionInternal()145 void CleanupPreviousUpdateAction::StartActionInternal() {
146 CHECK(prefs_);
147 CHECK(boot_control_);
148
149 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
150 running_ = true;
151 // Do nothing on non-VAB device.
152 if (!boot_control_->GetDynamicPartitionControl()
153 ->GetVirtualAbFeatureFlag()
154 .IsEnabled()) {
155 processor_->ActionComplete(this, ErrorCode::kSuccess);
156 return;
157 }
158 // SnapshotManager must be available on VAB devices.
159 CHECK(snapshot_ != nullptr);
160 merge_stats_ = snapshot_->GetSnapshotMergeStatsInstance();
161 CHECK(merge_stats_ != nullptr);
162 WaitBootCompletedOrSchedule();
163 }
164
ScheduleWaitBootCompleted()165 void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
166 TEST_AND_RETURN(running_);
167 scheduled_task_ = MessageLoop::current()->PostDelayedTask(
168 FROM_HERE,
169 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
170 base::Unretained(this)),
171 kCheckBootCompletedInterval);
172 CheckTaskScheduled("WaitBootCompleted");
173 }
174
WaitBootCompletedOrSchedule()175 void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
176 AcknowledgeTaskExecuted();
177 TEST_AND_RETURN(running_);
178 if (!kIsRecovery &&
179 !android::base::GetBoolProperty(kBootCompletedProp, false)) {
180 // repeat
181 ScheduleWaitBootCompleted();
182 return;
183 }
184
185 auto boot_time = std::chrono::duration_cast<std::chrono::milliseconds>(
186 android::base::boot_clock::now().time_since_epoch());
187 merge_stats_->set_boot_complete_time_ms(boot_time.count());
188
189 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
190 CheckSlotMarkedSuccessfulOrSchedule();
191 }
192
ScheduleWaitMarkBootSuccessful()193 void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
194 TEST_AND_RETURN(running_);
195 scheduled_task_ = MessageLoop::current()->PostDelayedTask(
196 FROM_HERE,
197 base::Bind(
198 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
199 base::Unretained(this)),
200 kCheckSlotMarkedSuccessfulInterval);
201 CheckTaskScheduled("WaitMarkBootSuccessful");
202 }
203
CheckSlotMarkedSuccessfulOrSchedule()204 void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
205 AcknowledgeTaskExecuted();
206 TEST_AND_RETURN(running_);
207 if (!kIsRecovery &&
208 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
209 ScheduleWaitMarkBootSuccessful();
210 return;
211 }
212
213 if (metadata_device_ == nullptr) {
214 metadata_device_ = snapshot_->EnsureMetadataMounted();
215 }
216
217 if (metadata_device_ == nullptr) {
218 LOG(ERROR) << "Failed to mount /metadata.";
219 // If metadata is erased but not formatted, it is possible to not mount
220 // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
221 processor_->ActionComplete(
222 this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
223 return;
224 }
225
226 if (kIsRecovery) {
227 auto snapshots_created =
228 snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
229 switch (snapshots_created) {
230 case android::snapshot::CreateResult::CREATED: {
231 // If previous update has not finished merging, snapshots exists and are
232 // created here so that ProcessUpdateState can proceed.
233 LOG(INFO) << "Snapshot devices are created";
234 break;
235 }
236 case android::snapshot::CreateResult::NOT_CREATED: {
237 // If there is no previous update, no snapshot devices are created and
238 // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
239 // considered an error.
240 LOG(INFO) << "Snapshot devices are not created";
241 break;
242 }
243 case android::snapshot::CreateResult::ERROR:
244 default: {
245 LOG(ERROR)
246 << "Failed to create snapshot devices (CreateResult = "
247 << static_cast<
248 std::underlying_type_t<android::snapshot::CreateResult>>(
249 snapshots_created);
250 processor_->ActionComplete(this, ErrorCode::kError);
251 return;
252 }
253 }
254 }
255
256 if (!merge_stats_->Start()) {
257 // Not an error because CleanupPreviousUpdateAction may be paused and
258 // resumed while kernel continues merging snapshots in the background.
259 LOG(WARNING) << "SnapshotMergeStats::Start failed.";
260 }
261 LOG(INFO) << "Waiting for any previous merge request to complete. "
262 << "This can take up to several minutes.";
263 WaitForMergeOrSchedule();
264 }
265
ScheduleWaitForMerge()266 void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
267 TEST_AND_RETURN(running_);
268 scheduled_task_ = MessageLoop::current()->PostDelayedTask(
269 FROM_HERE,
270 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
271 base::Unretained(this)),
272 kWaitForMergeInterval);
273 CheckTaskScheduled("WaitForMerge");
274 }
275
WaitForMergeOrSchedule()276 void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
277 AcknowledgeTaskExecuted();
278 TEST_AND_RETURN(running_);
279
280 snapshot_->SetMergeStatsFeatures(merge_stats_);
281
282 // Propagate the merge failure code to the merge stats. If we wait until
283 // after ProcessUpdateState, then a successful merge could overwrite the
284 // state of the previous failure.
285 auto failure_code = snapshot_->ReadMergeFailureCode();
286 if (failure_code != android::snapshot::MergeFailureCode::Ok) {
287 merge_stats_->set_merge_failure_code(failure_code);
288 }
289
290 auto state = snapshot_->ProcessUpdateState(
291 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
292 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
293 merge_stats_->set_state(state);
294
295 switch (state) {
296 case UpdateState::None: {
297 LOG(INFO) << "Can't find any snapshot to merge.";
298 ErrorCode error_code = ErrorCode::kSuccess;
299 if (!snapshot_->CancelUpdate()) {
300 error_code = ErrorCode::kError;
301 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
302 }
303 processor_->ActionComplete(this, error_code);
304 return;
305 }
306
307 case UpdateState::Initiated: {
308 LOG(ERROR) << "Previous update has not been completed, not cleaning up";
309 processor_->ActionComplete(this, ErrorCode::kSuccess);
310 return;
311 }
312
313 case UpdateState::Unverified: {
314 InitiateMergeAndWait();
315 return;
316 }
317
318 case UpdateState::Merging: {
319 ScheduleWaitForMerge();
320 return;
321 }
322
323 case UpdateState::MergeNeedsReboot: {
324 LOG(ERROR) << "Need reboot to finish merging.";
325 processor_->ActionComplete(this, ErrorCode::kError);
326 return;
327 }
328
329 case UpdateState::MergeCompleted: {
330 LOG(INFO) << "Merge finished with state MergeCompleted.";
331 boot_control_->MarkSlotUnbootable(1 - boot_control_->GetCurrentSlot());
332 processor_->ActionComplete(this, ErrorCode::kSuccess);
333 return;
334 }
335
336 case UpdateState::MergeFailed: {
337 LOG(ERROR) << "Merge failed. Device may be corrupted.";
338 merge_stats_->set_merge_failure_code(snapshot_->ReadMergeFailureCode());
339 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
340 return;
341 }
342
343 case UpdateState::Cancelled: {
344 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
345 // not deleted to avoid inconsistency.
346 // Nothing can be done here; just try next time.
347 ErrorCode error_code =
348 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
349 processor_->ActionComplete(this, error_code);
350 return;
351 }
352
353 default: {
354 // Protobuf has some reserved enum values, so a default case is needed.
355 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
356 << static_cast<int32_t>(state);
357 }
358 }
359 }
360
OnMergePercentageUpdate()361 bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
362 double percentage = 0.0;
363 snapshot_->GetUpdateState(&percentage);
364 if (delegate_) {
365 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
366 delegate_->OnCleanupProgressUpdate(percentage / 100);
367 }
368
369 // Log if percentage increments by at least 1.
370 if (last_percentage_ < static_cast<unsigned int>(percentage)) {
371 last_percentage_ = percentage;
372 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
373 }
374
375 // Do not continue to wait for merge. Instead, let ProcessUpdateState
376 // return Merging directly so that we can ScheduleWaitForMerge() in
377 // MessageLoop.
378 return false;
379 }
380
BeforeCancel()381 bool CleanupPreviousUpdateAction::BeforeCancel() {
382 if (DeltaPerformer::ResetUpdateProgress(prefs_, false /* quick */)) {
383 return true;
384 }
385
386 // ResetUpdateProgress might not work on stub prefs. Do additional checks.
387 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
388
389 std::string val;
390 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
391 if (val.empty()) {
392 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
393 << " is empty, assuming successful cleanup";
394 return true;
395 }
396 LOG(WARNING)
397 << kPrefsDynamicPartitionMetadataUpdated << " is " << val
398 << ", not deleting snapshots even though UpdateState is Cancelled.";
399 cancel_failed_ = true;
400 return false;
401 }
402
InitiateMergeAndWait()403 void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
404 TEST_AND_RETURN(running_);
405 LOG(INFO) << "Attempting to initiate merge.";
406 // suspend the VAB merge when running a DSU
407 if (GetBoolProperty("ro.gsid.image_running", false)) {
408 LOG(WARNING) << "Suspend the VAB merge when running a DSU.";
409 processor_->ActionComplete(this, ErrorCode::kError);
410 return;
411 }
412
413 snapshot_->UpdateCowStats(merge_stats_);
414
415 auto merge_start_time = std::chrono::duration_cast<std::chrono::milliseconds>(
416 android::base::boot_clock::now().time_since_epoch());
417 merge_stats_->set_boot_complete_to_merge_start_time_ms(
418 merge_start_time.count() - merge_stats_->boot_complete_time_ms());
419
420 auto source_build_fingerprint = snapshot_->ReadSourceBuildFingerprint();
421 merge_stats_->set_source_build_fingerprint(source_build_fingerprint);
422
423 if (!merge_stats_->WriteState()) {
424 LOG(ERROR) << "Failed to write merge stats; record may be unreliable if "
425 "merge is interrupted.";
426 }
427
428 if (snapshot_->InitiateMerge()) {
429 WaitForMergeOrSchedule();
430 return;
431 }
432
433 LOG(WARNING) << "InitiateMerge failed.";
434 auto state = snapshot_->GetUpdateState();
435 merge_stats_->set_state(state);
436 if (state == UpdateState::Unverified) {
437 // We are stuck at unverified state. This can happen if the update has
438 // been applied, but it has not even been attempted yet (in libsnapshot,
439 // rollback indicator does not exist); for example, if update_engine
440 // restarts before the device reboots, then this state may be reached.
441 // Nothing should be done here.
442 LOG(WARNING) << "InitiateMerge leaves the device at "
443 << "UpdateState::Unverified. (Did update_engine "
444 << "restarted?)";
445 processor_->ActionComplete(this, ErrorCode::kSuccess);
446 return;
447 }
448
449 // State does seems to be advanced.
450 // It is possibly racy. For example, on a userdebug build, the user may
451 // manually initiate a merge with snapshotctl between last time
452 // update_engine checks UpdateState. Hence, just call
453 // WaitForMergeOrSchedule one more time.
454 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
455 << android::snapshot::UpdateState_Name(state)
456 << ", try to wait for merge again.";
457 WaitForMergeOrSchedule();
458 return;
459 }
460
ReportMergeStats()461 void CleanupPreviousUpdateAction::ReportMergeStats() {
462 auto result = merge_stats_->Finish();
463 if (result == nullptr) {
464 LOG(WARNING) << "Not reporting merge stats because "
465 "SnapshotMergeStats::Finish failed.";
466 return;
467 }
468
469 #ifdef __ANDROID_RECOVERY__
470 LOG(INFO) << "Skip reporting merge stats in recovery.";
471 #else
472 const auto& report = result->report();
473
474 if (report.state() == UpdateState::None ||
475 report.state() == UpdateState::Initiated ||
476 report.state() == UpdateState::Unverified) {
477 LOG(INFO) << "Not reporting merge stats because state is "
478 << android::snapshot::UpdateState_Name(report.state());
479 return;
480 }
481
482 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
483 result->merge_time());
484
485 bool vab_retrofit = boot_control_->GetDynamicPartitionControl()
486 ->GetVirtualAbFeatureFlag()
487 .IsRetrofit();
488 bool vab_compression_enabled = boot_control_->GetDynamicPartitionControl()
489 ->GetVirtualAbCompressionFeatureFlag()
490 .IsEnabled();
491 // The snapshot has been merged, so we can no longer call
492 // DynamicPartitionControlInterface::UpdateUsesSnapshotCompression.
493 // However, we have saved the flag in the snapshot report.
494 bool vab_compression_used = report.compression_enabled();
495 bool userspace_snapshots_enabled =
496 boot_control_->GetDynamicPartitionControl()
497 ->GetVirtualAbUserspaceSnapshotsFeatureFlag()
498 .IsEnabled();
499 bool userspace_snapshots_used = report.userspace_snapshots_used();
500 bool xor_compression_enabled = boot_control_->GetDynamicPartitionControl()
501 ->GetVirtualAbCompressionXorFeatureFlag()
502 .IsEnabled();
503 bool xor_compression_used = report.xor_compression_used();
504 bool iouring_used = report.iouring_used();
505
506 auto target_build_fingerprint =
507 android::base::GetProperty("ro.build.fingerprint", "");
508
509 LOG(INFO) << "Reporting merge stats: "
510 << android::snapshot::UpdateState_Name(report.state()) << " in "
511 << passed_ms.count() << "ms (resumed " << report.resume_count()
512 << " times), using " << report.cow_file_size()
513 << " bytes of COW image.";
514 statsd::stats_write(statsd::SNAPSHOT_MERGE_REPORTED,
515 static_cast<int32_t>(report.state()),
516 static_cast<int64_t>(passed_ms.count()),
517 static_cast<int32_t>(report.resume_count()),
518 vab_retrofit,
519 static_cast<int64_t>(report.cow_file_size()),
520 vab_compression_enabled,
521 vab_compression_used,
522 report.total_cow_size_bytes(),
523 report.estimated_cow_size_bytes(),
524 report.boot_complete_time_ms(),
525 report.boot_complete_to_merge_start_time_ms(),
526 static_cast<int32_t>(report.merge_failure_code()),
527 report.source_build_fingerprint().c_str(),
528 target_build_fingerprint.c_str(),
529 userspace_snapshots_enabled,
530 userspace_snapshots_used,
531 xor_compression_enabled,
532 xor_compression_used,
533 iouring_used);
534 #endif
535 }
536
537 } // namespace chromeos_update_engine
538