• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (C) 2020 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "update_engine/cleanup_previous_update_action.h"
17 
18 #include <chrono>  // NOLINT(build/c++11) -- for merge times
19 #include <functional>
20 #include <string>
21 #include <type_traits>
22 
23 #include <android-base/properties.h>
24 #include <base/bind.h>
25 
26 #ifndef __ANDROID_RECOVERY__
27 #include <statslog.h>
28 #endif
29 
30 #include "update_engine/common/utils.h"
31 #include "update_engine/payload_consumer/delta_performer.h"
32 
33 using android::base::GetBoolProperty;
34 using android::snapshot::SnapshotManager;
35 using android::snapshot::SnapshotMergeStats;
36 using android::snapshot::UpdateState;
37 using brillo::MessageLoop;
38 
39 constexpr char kBootCompletedProp[] = "sys.boot_completed";
40 // Interval to check sys.boot_completed.
41 constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
42 // Interval to check IBootControl::isSlotMarkedSuccessful
43 constexpr auto kCheckSlotMarkedSuccessfulInterval =
44     base::TimeDelta::FromSeconds(2);
45 // Interval to call SnapshotManager::ProcessUpdateState
46 constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
47 
48 #ifdef __ANDROID_RECOVERY__
49 static constexpr bool kIsRecovery = true;
50 #else
51 static constexpr bool kIsRecovery = false;
52 #endif
53 
54 namespace chromeos_update_engine {
55 
CleanupPreviousUpdateAction(PrefsInterface * prefs,BootControlInterface * boot_control,android::snapshot::SnapshotManager * snapshot,CleanupPreviousUpdateActionDelegateInterface * delegate)56 CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
57     PrefsInterface* prefs,
58     BootControlInterface* boot_control,
59     android::snapshot::SnapshotManager* snapshot,
60     CleanupPreviousUpdateActionDelegateInterface* delegate)
61     : prefs_(prefs),
62       boot_control_(boot_control),
63       snapshot_(snapshot),
64       delegate_(delegate),
65       running_(false),
66       cancel_failed_(false),
67       last_percentage_(0),
68       merge_stats_(SnapshotMergeStats::GetInstance(*snapshot)) {}
69 
PerformAction()70 void CleanupPreviousUpdateAction::PerformAction() {
71   ResumeAction();
72 }
73 
TerminateProcessing()74 void CleanupPreviousUpdateAction::TerminateProcessing() {
75   SuspendAction();
76 }
77 
ResumeAction()78 void CleanupPreviousUpdateAction::ResumeAction() {
79   CHECK(prefs_);
80   CHECK(boot_control_);
81 
82   LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
83   running_ = true;
84   StartActionInternal();
85 }
86 
SuspendAction()87 void CleanupPreviousUpdateAction::SuspendAction() {
88   LOG(INFO) << "Stopping/suspending CleanupPreviousUpdateAction";
89   running_ = false;
90 }
91 
ActionCompleted(ErrorCode error_code)92 void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
93   running_ = false;
94   ReportMergeStats();
95   metadata_device_ = nullptr;
96 }
97 
Type() const98 std::string CleanupPreviousUpdateAction::Type() const {
99   return StaticType();
100 }
101 
StaticType()102 std::string CleanupPreviousUpdateAction::StaticType() {
103   return "CleanupPreviousUpdateAction";
104 }
105 
StartActionInternal()106 void CleanupPreviousUpdateAction::StartActionInternal() {
107   // Do nothing on non-VAB device.
108   if (!boot_control_->GetDynamicPartitionControl()
109            ->GetVirtualAbFeatureFlag()
110            .IsEnabled()) {
111     processor_->ActionComplete(this, ErrorCode::kSuccess);
112     return;
113   }
114   // SnapshotManager is only available on VAB devices.
115   CHECK(snapshot_);
116   WaitBootCompletedOrSchedule();
117 }
118 
ScheduleWaitBootCompleted()119 void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
120   TEST_AND_RETURN(running_);
121   MessageLoop::current()->PostDelayedTask(
122       FROM_HERE,
123       base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
124                  base::Unretained(this)),
125       kCheckBootCompletedInterval);
126 }
127 
WaitBootCompletedOrSchedule()128 void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
129   TEST_AND_RETURN(running_);
130   if (!kIsRecovery &&
131       !android::base::GetBoolProperty(kBootCompletedProp, false)) {
132     // repeat
133     ScheduleWaitBootCompleted();
134     return;
135   }
136 
137   LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
138   CheckSlotMarkedSuccessfulOrSchedule();
139 }
140 
ScheduleWaitMarkBootSuccessful()141 void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
142   TEST_AND_RETURN(running_);
143   MessageLoop::current()->PostDelayedTask(
144       FROM_HERE,
145       base::Bind(
146           &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
147           base::Unretained(this)),
148       kCheckSlotMarkedSuccessfulInterval);
149 }
150 
CheckSlotMarkedSuccessfulOrSchedule()151 void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
152   TEST_AND_RETURN(running_);
153   if (!kIsRecovery &&
154       !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
155     ScheduleWaitMarkBootSuccessful();
156     return;
157   }
158 
159   if (metadata_device_ == nullptr) {
160     metadata_device_ = snapshot_->EnsureMetadataMounted();
161   }
162 
163   if (metadata_device_ == nullptr) {
164     LOG(ERROR) << "Failed to mount /metadata.";
165     // If metadata is erased but not formatted, it is possible to not mount
166     // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
167     processor_->ActionComplete(
168         this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
169     return;
170   }
171 
172   if (kIsRecovery) {
173     auto snapshots_created =
174         snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
175     switch (snapshots_created) {
176       case android::snapshot::CreateResult::CREATED: {
177         // If previous update has not finished merging, snapshots exists and are
178         // created here so that ProcessUpdateState can proceed.
179         LOG(INFO) << "Snapshot devices are created";
180         break;
181       }
182       case android::snapshot::CreateResult::NOT_CREATED: {
183         // If there is no previous update, no snapshot devices are created and
184         // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
185         // considered an error.
186         LOG(INFO) << "Snapshot devices are not created";
187         break;
188       }
189       case android::snapshot::CreateResult::ERROR:
190       default: {
191         LOG(ERROR)
192             << "Failed to create snapshot devices (CreateResult = "
193             << static_cast<
194                    std::underlying_type_t<android::snapshot::CreateResult>>(
195                    snapshots_created);
196         processor_->ActionComplete(this, ErrorCode::kError);
197         return;
198       }
199     }
200   }
201 
202   if (!merge_stats_->Start()) {
203     // Not an error because CleanupPreviousUpdateAction may be paused and
204     // resumed while kernel continues merging snapshots in the background.
205     LOG(WARNING) << "SnapshotMergeStats::Start failed.";
206   }
207   LOG(INFO) << "Waiting for any previous merge request to complete. "
208             << "This can take up to several minutes.";
209   WaitForMergeOrSchedule();
210 }
211 
ScheduleWaitForMerge()212 void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
213   TEST_AND_RETURN(running_);
214   MessageLoop::current()->PostDelayedTask(
215       FROM_HERE,
216       base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
217                  base::Unretained(this)),
218       kWaitForMergeInterval);
219 }
220 
WaitForMergeOrSchedule()221 void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
222   TEST_AND_RETURN(running_);
223   auto state = snapshot_->ProcessUpdateState(
224       std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
225       std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
226   merge_stats_->set_state(state);
227 
228   switch (state) {
229     case UpdateState::None: {
230       LOG(INFO) << "Can't find any snapshot to merge.";
231       ErrorCode error_code = ErrorCode::kSuccess;
232       if (!snapshot_->CancelUpdate()) {
233         error_code = ErrorCode::kError;
234         LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
235       }
236       processor_->ActionComplete(this, error_code);
237       return;
238     }
239 
240     case UpdateState::Initiated: {
241       LOG(ERROR) << "Previous update has not been completed, not cleaning up";
242       processor_->ActionComplete(this, ErrorCode::kSuccess);
243       return;
244     }
245 
246     case UpdateState::Unverified: {
247       InitiateMergeAndWait();
248       return;
249     }
250 
251     case UpdateState::Merging: {
252       ScheduleWaitForMerge();
253       return;
254     }
255 
256     case UpdateState::MergeNeedsReboot: {
257       LOG(ERROR) << "Need reboot to finish merging.";
258       processor_->ActionComplete(this, ErrorCode::kError);
259       return;
260     }
261 
262     case UpdateState::MergeCompleted: {
263       LOG(INFO) << "Merge finished with state MergeCompleted.";
264       processor_->ActionComplete(this, ErrorCode::kSuccess);
265       return;
266     }
267 
268     case UpdateState::MergeFailed: {
269       LOG(ERROR) << "Merge failed. Device may be corrupted.";
270       processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
271       return;
272     }
273 
274     case UpdateState::Cancelled: {
275       // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
276       // not deleted to avoid inconsistency.
277       // Nothing can be done here; just try next time.
278       ErrorCode error_code =
279           cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
280       processor_->ActionComplete(this, error_code);
281       return;
282     }
283 
284     default: {
285       // Protobuf has some reserved enum values, so a default case is needed.
286       LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
287                  << static_cast<int32_t>(state);
288     }
289   }
290 }
291 
OnMergePercentageUpdate()292 bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
293   double percentage = 0.0;
294   snapshot_->GetUpdateState(&percentage);
295   if (delegate_) {
296     // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
297     delegate_->OnCleanupProgressUpdate(percentage / 100);
298   }
299 
300   // Log if percentage increments by at least 1.
301   if (last_percentage_ < static_cast<unsigned int>(percentage)) {
302     last_percentage_ = percentage;
303     LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
304   }
305 
306   // Do not continue to wait for merge. Instead, let ProcessUpdateState
307   // return Merging directly so that we can ScheduleWaitForMerge() in
308   // MessageLoop.
309   return false;
310 }
311 
BeforeCancel()312 bool CleanupPreviousUpdateAction::BeforeCancel() {
313   if (DeltaPerformer::ResetUpdateProgress(
314           prefs_,
315           false /* quick */,
316           false /* skip dynamic partitions metadata*/)) {
317     return true;
318   }
319 
320   // ResetUpdateProgress might not work on stub prefs. Do additional checks.
321   LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
322 
323   std::string val;
324   ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
325   if (val.empty()) {
326     LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
327               << " is empty, assuming successful cleanup";
328     return true;
329   }
330   LOG(WARNING)
331       << kPrefsDynamicPartitionMetadataUpdated << " is " << val
332       << ", not deleting snapshots even though UpdateState is Cancelled.";
333   cancel_failed_ = true;
334   return false;
335 }
336 
InitiateMergeAndWait()337 void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
338   TEST_AND_RETURN(running_);
339   LOG(INFO) << "Attempting to initiate merge.";
340   // suspend the VAB merge when running a DSU
341   if (GetBoolProperty("ro.gsid.image_running", false)) {
342     LOG(WARNING) << "Suspend the VAB merge when running a DSU.";
343     processor_->ActionComplete(this, ErrorCode::kError);
344     return;
345   }
346 
347   uint64_t cow_file_size;
348   if (snapshot_->InitiateMerge(&cow_file_size)) {
349     merge_stats_->set_cow_file_size(cow_file_size);
350     WaitForMergeOrSchedule();
351     return;
352   }
353 
354   LOG(WARNING) << "InitiateMerge failed.";
355   auto state = snapshot_->GetUpdateState();
356   merge_stats_->set_state(state);
357   if (state == UpdateState::Unverified) {
358     // We are stuck at unverified state. This can happen if the update has
359     // been applied, but it has not even been attempted yet (in libsnapshot,
360     // rollback indicator does not exist); for example, if update_engine
361     // restarts before the device reboots, then this state may be reached.
362     // Nothing should be done here.
363     LOG(WARNING) << "InitiateMerge leaves the device at "
364                  << "UpdateState::Unverified. (Did update_engine "
365                  << "restarted?)";
366     processor_->ActionComplete(this, ErrorCode::kSuccess);
367     return;
368   }
369 
370   // State does seems to be advanced.
371   // It is possibly racy. For example, on a userdebug build, the user may
372   // manually initiate a merge with snapshotctl between last time
373   // update_engine checks UpdateState. Hence, just call
374   // WaitForMergeOrSchedule one more time.
375   LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
376                << android::snapshot::UpdateState_Name(state)
377                << ", try to wait for merge again.";
378   WaitForMergeOrSchedule();
379   return;
380 }
381 
ReportMergeStats()382 void CleanupPreviousUpdateAction::ReportMergeStats() {
383   auto result = merge_stats_->Finish();
384   if (result == nullptr) {
385     LOG(WARNING) << "Not reporting merge stats because "
386                     "SnapshotMergeStats::Finish failed.";
387     return;
388   }
389 
390 #ifdef __ANDROID_RECOVERY__
391   LOG(INFO) << "Skip reporting merge stats in recovery.";
392 #else
393   const auto& report = result->report();
394 
395   if (report.state() == UpdateState::None ||
396       report.state() == UpdateState::Initiated ||
397       report.state() == UpdateState::Unverified) {
398     LOG(INFO) << "Not reporting merge stats because state is "
399               << android::snapshot::UpdateState_Name(report.state());
400     return;
401   }
402 
403   auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
404       result->merge_time());
405 
406   bool vab_retrofit = boot_control_->GetDynamicPartitionControl()
407                           ->GetVirtualAbFeatureFlag()
408                           .IsRetrofit();
409 
410   LOG(INFO) << "Reporting merge stats: "
411             << android::snapshot::UpdateState_Name(report.state()) << " in "
412             << passed_ms.count() << "ms (resumed " << report.resume_count()
413             << " times), using " << report.cow_file_size()
414             << " bytes of COW image.";
415   android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
416                              static_cast<int32_t>(report.state()),
417                              static_cast<int64_t>(passed_ms.count()),
418                              static_cast<int32_t>(report.resume_count()),
419                              vab_retrofit,
420                              static_cast<int64_t>(report.cow_file_size()));
421 #endif
422 }
423 
424 }  // namespace chromeos_update_engine
425