1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "watchdog.h"
18
19 #include <chrono>
20 #include <cstdint>
21 #include <optional>
22 #include <thread>
23 #include <vector>
24
25 #include <android-base/logging.h>
26
27 struct watcher {
28 watcher(const char* id, const struct storage_msg* request);
29 void SetState(const char* new_state);
30 void LogTimeout();
31 void LogFinished();
32
33 const char* id_;
34 uint32_t cmd_;
35 uint32_t op_id_;
36 uint32_t flags_;
37 const char* state_;
38
39 using clock = std::chrono::high_resolution_clock;
40 clock::time_point start_;
41 clock::time_point state_change_;
42 std::chrono::milliseconds Elapsed(clock::time_point end);
43
44 bool triggered_;
45 };
46
watcher(const char * id,const struct storage_msg * request)47 watcher::watcher(const char* id, const struct storage_msg* request)
48 : id_(id), state_(nullptr), triggered_(false) {
49 cmd_ = request->cmd;
50 op_id_ = request->op_id;
51 flags_ = request->flags;
52
53 start_ = clock::now();
54 state_change_ = start_;
55 }
56
SetState(const char * new_state)57 void watcher::SetState(const char* new_state) {
58 state_ = new_state;
59 state_change_ = clock::now();
60 }
61
LogTimeout()62 void watcher::LogTimeout() {
63 if (!triggered_) {
64 triggered_ = true;
65 LOG(ERROR) << "Storageproxyd watchdog triggered: " << id_ << " cmd: " << cmd_
66 << " op_id: " << op_id_ << " flags: " << flags_;
67 }
68 if (state_) {
69 LOG(ERROR) << "...elapsed: " << Elapsed(clock::now()).count() << "ms (" << state_ << " "
70 << Elapsed(state_change_).count() << "ms)";
71 } else {
72 LOG(ERROR) << "...elapsed: " << Elapsed(clock::now()).count() << "ms";
73 }
74 }
75
LogFinished()76 void watcher::LogFinished() {
77 if (triggered_) {
78 LOG(ERROR) << "...completed: " << Elapsed(clock::now()).count() << "ms";
79 }
80 }
81
Elapsed(watcher::clock::time_point end)82 std::chrono::milliseconds watcher::Elapsed(watcher::clock::time_point end) {
83 return std::chrono::duration_cast<std::chrono::milliseconds>(end - start_);
84 }
85
86 namespace {
87
88 class Watchdog {
89 private:
90 static constexpr std::chrono::milliseconds kDefaultTimeoutMs = std::chrono::milliseconds(500);
91 static constexpr std::chrono::milliseconds kMaxTimeoutMs = std::chrono::seconds(10);
92
93 public:
Watchdog()94 Watchdog() : watcher_(), done_(false) {}
95 ~Watchdog();
96 struct watcher* RegisterWatch(const char* id, const struct storage_msg* request);
97 void AddProgress(struct watcher* watcher, const char* state);
98 void UnRegisterWatch(struct watcher* watcher);
99
100 private:
101 // Syncronizes access to watcher_ and watcher_change_ between the main
102 // thread and watchdog loop thread. watcher_ may only be modified by the
103 // main thread; the watchdog loop is read-only.
104 std::mutex watcher_mutex_;
105 std::unique_ptr<struct watcher> watcher_;
106 std::condition_variable watcher_change_;
107
108 std::thread watchdog_thread_;
109 bool done_;
110
111 void WatchdogLoop();
112 void LogWatchdogTriggerLocked();
113 };
114
115 Watchdog gWatchdog;
116
117 } // Anonymous namespace
118
119 // Assumes that caller is single-threaded. If we want to use this from a
120 // multi-threaded context we need to ensure that the watchdog thread is
121 // initialized safely once and accessing an existing watcher is done while the
122 // watcher lock is held.
RegisterWatch(const char * id,const struct storage_msg * request)123 struct watcher* Watchdog::RegisterWatch(const char* id, const struct storage_msg* request) {
124 if (!watchdog_thread_.joinable()) {
125 watchdog_thread_ = std::thread(&Watchdog::WatchdogLoop, this);
126 }
127 if (watcher_) {
128 LOG(ERROR) << "Replacing registered watcher " << watcher_->id_;
129 UnRegisterWatch(watcher_.get());
130 }
131
132 struct watcher* ret = nullptr;
133 {
134 std::unique_lock<std::mutex> watcherLock(watcher_mutex_);
135 watcher_ = std::make_unique<struct watcher>(id, request);
136 ret = watcher_.get();
137 }
138 watcher_change_.notify_one();
139 return ret;
140 }
141
UnRegisterWatch(struct watcher * watcher)142 void Watchdog::UnRegisterWatch(struct watcher* watcher) {
143 {
144 std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
145 if (!watcher_) {
146 LOG(ERROR) << "Cannot unregister watcher, no watcher registered";
147 return;
148 }
149 if (watcher_.get() != watcher) {
150 LOG(ERROR) << "Unregistering watcher that doesn't match current watcher";
151 }
152 watcher_->LogFinished();
153 watcher_.reset(nullptr);
154 }
155 watcher_change_.notify_one();
156 }
157
AddProgress(struct watcher * watcher,const char * state)158 void Watchdog::AddProgress(struct watcher* watcher, const char* state) {
159 std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
160 if (watcher_.get() != watcher) {
161 LOG(ERROR) << "Watcher was not registered, cannot log progress: " << state;
162 return;
163 }
164 watcher->SetState(state);
165 }
166
WatchdogLoop()167 void Watchdog::WatchdogLoop() {
168 std::unique_lock<std::mutex> lock(watcher_mutex_);
169 std::chrono::milliseconds timeout = kDefaultTimeoutMs;
170
171 while (!done_) {
172 // wait for a watch to be registered
173 watcher_change_.wait(lock, [this] { return !!watcher_; });
174
175 // wait for the timeout or unregistration
176 timeout = kDefaultTimeoutMs;
177 do {
178 if (!watcher_change_.wait_for(lock, timeout, [this] { return !watcher_; })) {
179 watcher_->LogTimeout();
180 timeout = std::min(timeout * 2, kMaxTimeoutMs);
181 }
182 } while (!!watcher_);
183 }
184 }
185
~Watchdog()186 Watchdog::~Watchdog() {
187 {
188 std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
189 watcher_.reset(nullptr);
190 done_ = true;
191 }
192 watcher_change_.notify_one();
193 if (watchdog_thread_.joinable()) {
194 watchdog_thread_.join();
195 }
196 }
197
watch_start(const char * id,const struct storage_msg * request)198 struct watcher* watch_start(const char* id, const struct storage_msg* request) {
199 return gWatchdog.RegisterWatch(id, request);
200 }
201
watch_progress(struct watcher * watcher,const char * state)202 void watch_progress(struct watcher* watcher, const char* state) {
203 gWatchdog.AddProgress(watcher, state);
204 }
205
watch_finish(struct watcher * watcher)206 void watch_finish(struct watcher* watcher) {
207 gWatchdog.UnRegisterWatch(watcher);
208 }
209