• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "watchdog.h"
18 
19 #include <chrono>
20 #include <cstdint>
21 #include <mutex>
22 #include <optional>
23 #include <thread>
24 #include <vector>
25 
26 #include <android-base/logging.h>
27 
28 struct watcher {
29     watcher(const char* id, const struct storage_msg* request);
30     void SetState(const char* new_state);
31     void LogTimeout();
32     void LogFinished();
33 
34     const char* id_;
35     uint32_t cmd_;
36     uint32_t op_id_;
37     uint32_t flags_;
38     const char* state_;
39 
40     using clock = std::chrono::high_resolution_clock;
41     clock::time_point start_;
42     clock::time_point state_change_;
43     std::chrono::milliseconds Elapsed(clock::time_point end);
44 
45     bool triggered_;
46 };
47 
watcher(const char * id,const struct storage_msg * request)48 watcher::watcher(const char* id, const struct storage_msg* request)
49     : id_(id), state_(nullptr), triggered_(false) {
50     cmd_ = request->cmd;
51     op_id_ = request->op_id;
52     flags_ = request->flags;
53 
54     start_ = clock::now();
55     state_change_ = start_;
56 }
57 
SetState(const char * new_state)58 void watcher::SetState(const char* new_state) {
59     state_ = new_state;
60     state_change_ = clock::now();
61 }
62 
LogTimeout()63 void watcher::LogTimeout() {
64     if (!triggered_) {
65         triggered_ = true;
66         LOG(ERROR) << "Storageproxyd watchdog triggered: " << id_ << " cmd: " << cmd_
67                    << " op_id: " << op_id_ << " flags: " << flags_;
68     }
69     if (state_) {
70         LOG(ERROR) << "...elapsed: " << Elapsed(clock::now()).count() << "ms (" << state_ << " "
71                    << Elapsed(state_change_).count() << "ms)";
72     } else {
73         LOG(ERROR) << "...elapsed: " << Elapsed(clock::now()).count() << "ms";
74     }
75 }
76 
LogFinished()77 void watcher::LogFinished() {
78     if (triggered_) {
79         LOG(ERROR) << "...completed: " << Elapsed(clock::now()).count() << "ms";
80     }
81 }
82 
Elapsed(watcher::clock::time_point end)83 std::chrono::milliseconds watcher::Elapsed(watcher::clock::time_point end) {
84     return std::chrono::duration_cast<std::chrono::milliseconds>(end - start_);
85 }
86 
87 namespace {
88 
89 class Watchdog {
90   private:
91     static constexpr std::chrono::milliseconds kDefaultTimeoutMs = std::chrono::milliseconds(500);
92     static constexpr std::chrono::milliseconds kMaxTimeoutMs = std::chrono::seconds(10);
93 
94   public:
Watchdog()95     Watchdog() : watcher_(), done_(false) {}
96     ~Watchdog();
97     struct watcher* RegisterWatch(const char* id, const struct storage_msg* request);
98     void AddProgress(struct watcher* watcher, const char* state);
99     void UnRegisterWatch(struct watcher* watcher);
100 
101   private:
102     // Syncronizes access to watcher_ and watcher_change_ between the main
103     // thread and watchdog loop thread. watcher_ may only be modified by the
104     // main thread; the watchdog loop is read-only.
105     std::mutex watcher_mutex_;
106     std::unique_ptr<struct watcher> watcher_;
107     std::condition_variable watcher_change_;
108 
109     std::thread watchdog_thread_;
110     bool done_;
111 
112     void WatchdogLoop();
113     void LogWatchdogTriggerLocked();
114 };
115 
116 Watchdog gWatchdog;
117 
118 }  // Anonymous namespace
119 
120 // Assumes that caller is single-threaded. If we want to use this from a
121 // multi-threaded context we need to ensure that the watchdog thread is
122 // initialized safely once and accessing an existing watcher is done while the
123 // watcher lock is held.
RegisterWatch(const char * id,const struct storage_msg * request)124 struct watcher* Watchdog::RegisterWatch(const char* id, const struct storage_msg* request) {
125     if (!watchdog_thread_.joinable()) {
126         watchdog_thread_ = std::thread(&Watchdog::WatchdogLoop, this);
127     }
128     if (watcher_) {
129         LOG(ERROR) << "Replacing registered watcher " << watcher_->id_;
130         UnRegisterWatch(watcher_.get());
131     }
132 
133     struct watcher* ret = nullptr;
134     {
135         std::unique_lock<std::mutex> watcherLock(watcher_mutex_);
136         watcher_ = std::make_unique<struct watcher>(id, request);
137         ret = watcher_.get();
138     }
139     watcher_change_.notify_one();
140     return ret;
141 }
142 
UnRegisterWatch(struct watcher * watcher)143 void Watchdog::UnRegisterWatch(struct watcher* watcher) {
144     {
145         std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
146         if (!watcher_) {
147             LOG(ERROR) << "Cannot unregister watcher, no watcher registered";
148             return;
149         }
150         if (watcher_.get() != watcher) {
151             LOG(ERROR) << "Unregistering watcher that doesn't match current watcher";
152         }
153         watcher_->LogFinished();
154         watcher_.reset(nullptr);
155     }
156     watcher_change_.notify_one();
157 }
158 
AddProgress(struct watcher * watcher,const char * state)159 void Watchdog::AddProgress(struct watcher* watcher, const char* state) {
160     std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
161     if (watcher_.get() != watcher) {
162         LOG(ERROR) << "Watcher was not registered, cannot log progress: " << state;
163         return;
164     }
165     watcher->SetState(state);
166 }
167 
WatchdogLoop()168 void Watchdog::WatchdogLoop() {
169     std::unique_lock<std::mutex> lock(watcher_mutex_);
170     std::chrono::milliseconds timeout = kDefaultTimeoutMs;
171 
172     while (!done_) {
173         // wait for a watch to be registered
174         watcher_change_.wait(lock, [this] { return !!watcher_; });
175 
176         // wait for the timeout or unregistration
177         timeout = kDefaultTimeoutMs;
178         do {
179             if (!watcher_change_.wait_for(lock, timeout, [this] { return !watcher_; })) {
180                 watcher_->LogTimeout();
181                 timeout = std::min(timeout * 2, kMaxTimeoutMs);
182             }
183         } while (!!watcher_);
184     }
185 }
186 
~Watchdog()187 Watchdog::~Watchdog() {
188     {
189         std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
190         watcher_.reset(nullptr);
191         done_ = true;
192     }
193     watcher_change_.notify_one();
194     if (watchdog_thread_.joinable()) {
195         watchdog_thread_.join();
196     }
197 }
198 
watch_start(const char * id,const struct storage_msg * request)199 struct watcher* watch_start(const char* id, const struct storage_msg* request) {
200     return gWatchdog.RegisterWatch(id, request);
201 }
202 
watch_progress(struct watcher * watcher,const char * state)203 void watch_progress(struct watcher* watcher, const char* state) {
204     gWatchdog.AddProgress(watcher, state);
205 }
206 
watch_finish(struct watcher * watcher)207 void watch_finish(struct watcher* watcher) {
208     gWatchdog.UnRegisterWatch(watcher);
209 }
210