• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (C) 2020 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "update_engine/aosp/cleanup_previous_update_action.h"
17 
18 #include <chrono>  // NOLINT(build/c++11) -- for merge times
19 #include <functional>
20 #include <string>
21 #include <type_traits>
22 
23 #include <android-base/chrono_utils.h>
24 #include <android-base/properties.h>
25 #include <base/bind.h>
26 
27 #ifndef __ANDROID_RECOVERY__
28 #include <statslog_ue.h>
29 #endif
30 
31 #include "update_engine/common/utils.h"
32 #include "update_engine/payload_consumer/delta_performer.h"
33 
34 using android::base::GetBoolProperty;
35 using android::snapshot::ISnapshotManager;
36 using android::snapshot::SnapshotMergeStats;
37 using android::snapshot::UpdateState;
38 using brillo::MessageLoop;
39 
40 constexpr char kBootCompletedProp[] = "sys.boot_completed";
41 // Interval to check sys.boot_completed.
42 constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
43 // Interval to check IBootControl::isSlotMarkedSuccessful
44 constexpr auto kCheckSlotMarkedSuccessfulInterval =
45     base::TimeDelta::FromSeconds(2);
46 // Interval to call SnapshotManager::ProcessUpdateState
47 constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
48 
49 #ifdef __ANDROID_RECOVERY__
50 static constexpr bool kIsRecovery = true;
51 #else
52 static constexpr bool kIsRecovery = false;
53 #endif
54 
55 namespace chromeos_update_engine {
56 
CleanupPreviousUpdateAction(PrefsInterface * prefs,BootControlInterface * boot_control,android::snapshot::ISnapshotManager * snapshot,CleanupPreviousUpdateActionDelegateInterface * delegate)57 CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
58     PrefsInterface* prefs,
59     BootControlInterface* boot_control,
60     android::snapshot::ISnapshotManager* snapshot,
61     CleanupPreviousUpdateActionDelegateInterface* delegate)
62     : prefs_(prefs),
63       boot_control_(boot_control),
64       snapshot_(snapshot),
65       delegate_(delegate),
66       running_(false),
67       cancel_failed_(false),
68       last_percentage_(0),
69       merge_stats_(nullptr) {}
70 
~CleanupPreviousUpdateAction()71 CleanupPreviousUpdateAction::~CleanupPreviousUpdateAction() {
72   StopActionInternal();
73 }
74 
PerformAction()75 void CleanupPreviousUpdateAction::PerformAction() {
76   StartActionInternal();
77 }
78 
TerminateProcessing()79 void CleanupPreviousUpdateAction::TerminateProcessing() {
80   StopActionInternal();
81 }
82 
ResumeAction()83 void CleanupPreviousUpdateAction::ResumeAction() {
84   StartActionInternal();
85 }
86 
SuspendAction()87 void CleanupPreviousUpdateAction::SuspendAction() {
88   StopActionInternal();
89 }
90 
ActionCompleted(ErrorCode error_code)91 void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
92   StopActionInternal();
93   ReportMergeStats();
94   metadata_device_ = nullptr;
95 }
96 
Type() const97 std::string CleanupPreviousUpdateAction::Type() const {
98   return StaticType();
99 }
100 
StaticType()101 std::string CleanupPreviousUpdateAction::StaticType() {
102   return "CleanupPreviousUpdateAction";
103 }
104 
105 // This function is called at the beginning of all delayed functions. By
106 // resetting |scheduled_task_|, the delayed function acknowledges that the task
107 // has already been executed, therefore there's no need to cancel it in the
108 // future. This avoids StopActionInternal() from resetting task IDs in an
109 // unexpected way because task IDs could be reused.
AcknowledgeTaskExecuted()110 void CleanupPreviousUpdateAction::AcknowledgeTaskExecuted() {
111   if (scheduled_task_ != MessageLoop::kTaskIdNull) {
112     LOG(INFO) << "Executing task " << scheduled_task_;
113   }
114   scheduled_task_ = MessageLoop::kTaskIdNull;
115 }
116 
117 // Check that scheduled_task_ is a valid task ID. Otherwise, terminate the
118 // action.
CheckTaskScheduled(std::string_view name)119 void CleanupPreviousUpdateAction::CheckTaskScheduled(std::string_view name) {
120   if (scheduled_task_ == MessageLoop::kTaskIdNull) {
121     LOG(ERROR) << "Unable to schedule " << name;
122     processor_->ActionComplete(this, ErrorCode::kError);
123   } else {
124     LOG(INFO) << "CleanupPreviousUpdateAction scheduled task ID "
125               << scheduled_task_ << " for " << name;
126   }
127 }
128 
StopActionInternal()129 void CleanupPreviousUpdateAction::StopActionInternal() {
130   LOG(INFO) << "Stopping/suspending/completing CleanupPreviousUpdateAction";
131   running_ = false;
132 
133   if (scheduled_task_ != MessageLoop::kTaskIdNull) {
134     if (MessageLoop::current()->CancelTask(scheduled_task_)) {
135       LOG(INFO) << "CleanupPreviousUpdateAction cancelled pending task ID "
136                 << scheduled_task_;
137     } else {
138       LOG(ERROR) << "CleanupPreviousUpdateAction unable to cancel task ID "
139                  << scheduled_task_;
140     }
141   }
142   scheduled_task_ = MessageLoop::kTaskIdNull;
143 }
144 
StartActionInternal()145 void CleanupPreviousUpdateAction::StartActionInternal() {
146   CHECK(prefs_);
147   CHECK(boot_control_);
148 
149   LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
150   running_ = true;
151   // Do nothing on non-VAB device.
152   if (!boot_control_->GetDynamicPartitionControl()
153            ->GetVirtualAbFeatureFlag()
154            .IsEnabled()) {
155     processor_->ActionComplete(this, ErrorCode::kSuccess);
156     return;
157   }
158   // SnapshotManager must be available on VAB devices.
159   CHECK(snapshot_ != nullptr);
160   merge_stats_ = snapshot_->GetSnapshotMergeStatsInstance();
161   CHECK(merge_stats_ != nullptr);
162   WaitBootCompletedOrSchedule();
163 }
164 
ScheduleWaitBootCompleted()165 void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
166   TEST_AND_RETURN(running_);
167   scheduled_task_ = MessageLoop::current()->PostDelayedTask(
168       FROM_HERE,
169       base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
170                  base::Unretained(this)),
171       kCheckBootCompletedInterval);
172   CheckTaskScheduled("WaitBootCompleted");
173 }
174 
WaitBootCompletedOrSchedule()175 void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
176   AcknowledgeTaskExecuted();
177   TEST_AND_RETURN(running_);
178   if (!kIsRecovery &&
179       !android::base::GetBoolProperty(kBootCompletedProp, false)) {
180     // repeat
181     ScheduleWaitBootCompleted();
182     return;
183   }
184 
185   auto boot_time = std::chrono::duration_cast<std::chrono::milliseconds>(
186       android::base::boot_clock::now().time_since_epoch());
187   merge_stats_->set_boot_complete_time_ms(boot_time.count());
188 
189   LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
190   CheckSlotMarkedSuccessfulOrSchedule();
191 }
192 
ScheduleWaitMarkBootSuccessful()193 void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
194   TEST_AND_RETURN(running_);
195   scheduled_task_ = MessageLoop::current()->PostDelayedTask(
196       FROM_HERE,
197       base::Bind(
198           &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
199           base::Unretained(this)),
200       kCheckSlotMarkedSuccessfulInterval);
201   CheckTaskScheduled("WaitMarkBootSuccessful");
202 }
203 
CheckSlotMarkedSuccessfulOrSchedule()204 void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
205   AcknowledgeTaskExecuted();
206   TEST_AND_RETURN(running_);
207   if (!kIsRecovery &&
208       !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
209     ScheduleWaitMarkBootSuccessful();
210     return;
211   }
212 
213   if (metadata_device_ == nullptr) {
214     metadata_device_ = snapshot_->EnsureMetadataMounted();
215   }
216 
217   if (metadata_device_ == nullptr) {
218     LOG(ERROR) << "Failed to mount /metadata.";
219     // If metadata is erased but not formatted, it is possible to not mount
220     // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
221     processor_->ActionComplete(
222         this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
223     return;
224   }
225 
226   if (kIsRecovery) {
227     auto snapshots_created =
228         snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
229     switch (snapshots_created) {
230       case android::snapshot::CreateResult::CREATED: {
231         // If previous update has not finished merging, snapshots exists and are
232         // created here so that ProcessUpdateState can proceed.
233         LOG(INFO) << "Snapshot devices are created";
234         break;
235       }
236       case android::snapshot::CreateResult::NOT_CREATED: {
237         // If there is no previous update, no snapshot devices are created and
238         // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
239         // considered an error.
240         LOG(INFO) << "Snapshot devices are not created";
241         break;
242       }
243       case android::snapshot::CreateResult::ERROR:
244       default: {
245         LOG(ERROR)
246             << "Failed to create snapshot devices (CreateResult = "
247             << static_cast<
248                    std::underlying_type_t<android::snapshot::CreateResult>>(
249                    snapshots_created);
250         processor_->ActionComplete(this, ErrorCode::kError);
251         return;
252       }
253     }
254   }
255 
256   if (!merge_stats_->Start()) {
257     // Not an error because CleanupPreviousUpdateAction may be paused and
258     // resumed while kernel continues merging snapshots in the background.
259     LOG(WARNING) << "SnapshotMergeStats::Start failed.";
260   }
261   LOG(INFO) << "Waiting for any previous merge request to complete. "
262             << "This can take up to several minutes.";
263   WaitForMergeOrSchedule();
264 }
265 
ScheduleWaitForMerge()266 void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
267   TEST_AND_RETURN(running_);
268   scheduled_task_ = MessageLoop::current()->PostDelayedTask(
269       FROM_HERE,
270       base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
271                  base::Unretained(this)),
272       kWaitForMergeInterval);
273   CheckTaskScheduled("WaitForMerge");
274 }
275 
WaitForMergeOrSchedule()276 void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
277   AcknowledgeTaskExecuted();
278   TEST_AND_RETURN(running_);
279 
280   snapshot_->SetMergeStatsFeatures(merge_stats_);
281 
282   // Propagate the merge failure code to the merge stats. If we wait until
283   // after ProcessUpdateState, then a successful merge could overwrite the
284   // state of the previous failure.
285   auto failure_code = snapshot_->ReadMergeFailureCode();
286   if (failure_code != android::snapshot::MergeFailureCode::Ok) {
287     merge_stats_->set_merge_failure_code(failure_code);
288   }
289 
290   auto state = snapshot_->ProcessUpdateState(
291       std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
292       std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
293   merge_stats_->set_state(state);
294 
295   switch (state) {
296     case UpdateState::None: {
297       LOG(INFO) << "Can't find any snapshot to merge.";
298       ErrorCode error_code = ErrorCode::kSuccess;
299       if (!snapshot_->CancelUpdate()) {
300         error_code = ErrorCode::kError;
301         LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
302       }
303       processor_->ActionComplete(this, error_code);
304       return;
305     }
306 
307     case UpdateState::Initiated: {
308       LOG(ERROR) << "Previous update has not been completed, not cleaning up";
309       processor_->ActionComplete(this, ErrorCode::kSuccess);
310       return;
311     }
312 
313     case UpdateState::Unverified: {
314       InitiateMergeAndWait();
315       return;
316     }
317 
318     case UpdateState::Merging: {
319       ScheduleWaitForMerge();
320       return;
321     }
322 
323     case UpdateState::MergeNeedsReboot: {
324       LOG(ERROR) << "Need reboot to finish merging.";
325       processor_->ActionComplete(this, ErrorCode::kError);
326       return;
327     }
328 
329     case UpdateState::MergeCompleted: {
330       LOG(INFO) << "Merge finished with state MergeCompleted.";
331       boot_control_->MarkSlotUnbootable(1 - boot_control_->GetCurrentSlot());
332       processor_->ActionComplete(this, ErrorCode::kSuccess);
333       return;
334     }
335 
336     case UpdateState::MergeFailed: {
337       LOG(ERROR) << "Merge failed. Device may be corrupted.";
338       merge_stats_->set_merge_failure_code(snapshot_->ReadMergeFailureCode());
339       processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
340       return;
341     }
342 
343     case UpdateState::Cancelled: {
344       // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
345       // not deleted to avoid inconsistency.
346       // Nothing can be done here; just try next time.
347       ErrorCode error_code =
348           cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
349       processor_->ActionComplete(this, error_code);
350       return;
351     }
352 
353     default: {
354       // Protobuf has some reserved enum values, so a default case is needed.
355       LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
356                  << static_cast<int32_t>(state);
357     }
358   }
359 }
360 
OnMergePercentageUpdate()361 bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
362   double percentage = 0.0;
363   snapshot_->GetUpdateState(&percentage);
364   if (delegate_) {
365     // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
366     delegate_->OnCleanupProgressUpdate(percentage / 100);
367   }
368 
369   // Log if percentage increments by at least 1.
370   if (last_percentage_ < static_cast<unsigned int>(percentage)) {
371     last_percentage_ = percentage;
372     LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
373   }
374 
375   // Do not continue to wait for merge. Instead, let ProcessUpdateState
376   // return Merging directly so that we can ScheduleWaitForMerge() in
377   // MessageLoop.
378   return false;
379 }
380 
BeforeCancel()381 bool CleanupPreviousUpdateAction::BeforeCancel() {
382   if (DeltaPerformer::ResetUpdateProgress(prefs_, false /* quick */)) {
383     return true;
384   }
385 
386   // ResetUpdateProgress might not work on stub prefs. Do additional checks.
387   LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
388 
389   std::string val;
390   ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
391   if (val.empty()) {
392     LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
393               << " is empty, assuming successful cleanup";
394     return true;
395   }
396   LOG(WARNING)
397       << kPrefsDynamicPartitionMetadataUpdated << " is " << val
398       << ", not deleting snapshots even though UpdateState is Cancelled.";
399   cancel_failed_ = true;
400   return false;
401 }
402 
InitiateMergeAndWait()403 void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
404   TEST_AND_RETURN(running_);
405   LOG(INFO) << "Attempting to initiate merge.";
406   // suspend the VAB merge when running a DSU
407   if (GetBoolProperty("ro.gsid.image_running", false)) {
408     LOG(WARNING) << "Suspend the VAB merge when running a DSU.";
409     processor_->ActionComplete(this, ErrorCode::kError);
410     return;
411   }
412 
413   snapshot_->UpdateCowStats(merge_stats_);
414 
415   auto merge_start_time = std::chrono::duration_cast<std::chrono::milliseconds>(
416       android::base::boot_clock::now().time_since_epoch());
417   merge_stats_->set_boot_complete_to_merge_start_time_ms(
418       merge_start_time.count() - merge_stats_->boot_complete_time_ms());
419 
420   auto source_build_fingerprint = snapshot_->ReadSourceBuildFingerprint();
421   merge_stats_->set_source_build_fingerprint(source_build_fingerprint);
422 
423   if (!merge_stats_->WriteState()) {
424     LOG(ERROR) << "Failed to write merge stats; record may be unreliable if "
425                   "merge is interrupted.";
426   }
427 
428   if (snapshot_->InitiateMerge()) {
429     WaitForMergeOrSchedule();
430     return;
431   }
432 
433   LOG(WARNING) << "InitiateMerge failed.";
434   auto state = snapshot_->GetUpdateState();
435   merge_stats_->set_state(state);
436   if (state == UpdateState::Unverified) {
437     // We are stuck at unverified state. This can happen if the update has
438     // been applied, but it has not even been attempted yet (in libsnapshot,
439     // rollback indicator does not exist); for example, if update_engine
440     // restarts before the device reboots, then this state may be reached.
441     // Nothing should be done here.
442     LOG(WARNING) << "InitiateMerge leaves the device at "
443                  << "UpdateState::Unverified. (Did update_engine "
444                  << "restarted?)";
445     processor_->ActionComplete(this, ErrorCode::kSuccess);
446     return;
447   }
448 
449   // State does seems to be advanced.
450   // It is possibly racy. For example, on a userdebug build, the user may
451   // manually initiate a merge with snapshotctl between last time
452   // update_engine checks UpdateState. Hence, just call
453   // WaitForMergeOrSchedule one more time.
454   LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
455                << android::snapshot::UpdateState_Name(state)
456                << ", try to wait for merge again.";
457   WaitForMergeOrSchedule();
458   return;
459 }
460 
ReportMergeStats()461 void CleanupPreviousUpdateAction::ReportMergeStats() {
462   auto result = merge_stats_->Finish();
463   if (result == nullptr) {
464     LOG(WARNING) << "Not reporting merge stats because "
465                     "SnapshotMergeStats::Finish failed.";
466     return;
467   }
468 
469 #ifdef __ANDROID_RECOVERY__
470   LOG(INFO) << "Skip reporting merge stats in recovery.";
471 #else
472   const auto& report = result->report();
473 
474   if (report.state() == UpdateState::None ||
475       report.state() == UpdateState::Initiated ||
476       report.state() == UpdateState::Unverified) {
477     LOG(INFO) << "Not reporting merge stats because state is "
478               << android::snapshot::UpdateState_Name(report.state());
479     return;
480   }
481 
482   auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
483       result->merge_time());
484 
485   bool vab_retrofit = boot_control_->GetDynamicPartitionControl()
486                           ->GetVirtualAbFeatureFlag()
487                           .IsRetrofit();
488   bool vab_compression_enabled = boot_control_->GetDynamicPartitionControl()
489                                      ->GetVirtualAbCompressionFeatureFlag()
490                                      .IsEnabled();
491   // The snapshot has been merged, so we can no longer call
492   // DynamicPartitionControlInterface::UpdateUsesSnapshotCompression.
493   // However, we have saved the flag in the snapshot report.
494   bool vab_compression_used = report.compression_enabled();
495   bool userspace_snapshots_enabled =
496       boot_control_->GetDynamicPartitionControl()
497           ->GetVirtualAbUserspaceSnapshotsFeatureFlag()
498           .IsEnabled();
499   bool userspace_snapshots_used = report.userspace_snapshots_used();
500   bool xor_compression_enabled = boot_control_->GetDynamicPartitionControl()
501                                      ->GetVirtualAbCompressionXorFeatureFlag()
502                                      .IsEnabled();
503   bool xor_compression_used = report.xor_compression_used();
504   bool iouring_used = report.iouring_used();
505 
506   auto target_build_fingerprint =
507       android::base::GetProperty("ro.build.fingerprint", "");
508 
509   LOG(INFO) << "Reporting merge stats: "
510             << android::snapshot::UpdateState_Name(report.state()) << " in "
511             << passed_ms.count() << "ms (resumed " << report.resume_count()
512             << " times), using " << report.cow_file_size()
513             << " bytes of COW image.";
514   statsd::stats_write(statsd::SNAPSHOT_MERGE_REPORTED,
515                       static_cast<int32_t>(report.state()),
516                       static_cast<int64_t>(passed_ms.count()),
517                       static_cast<int32_t>(report.resume_count()),
518                       vab_retrofit,
519                       static_cast<int64_t>(report.cow_file_size()),
520                       vab_compression_enabled,
521                       vab_compression_used,
522                       report.total_cow_size_bytes(),
523                       report.estimated_cow_size_bytes(),
524                       report.boot_complete_time_ms(),
525                       report.boot_complete_to_merge_start_time_ms(),
526                       static_cast<int32_t>(report.merge_failure_code()),
527                       report.source_build_fingerprint().c_str(),
528                       target_build_fingerprint.c_str(),
529                       userspace_snapshots_enabled,
530                       userspace_snapshots_used,
531                       xor_compression_enabled,
532                       xor_compression_used,
533                       iouring_used);
534 #endif
535 }
536 
537 }  // namespace chromeos_update_engine
538