• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "watchdog.h"
18 
19 #include <chrono>
20 #include <cstdint>
21 #include <optional>
22 #include <thread>
23 #include <vector>
24 
25 #include <android-base/logging.h>
26 
27 struct watcher {
28     watcher(const char* id, const struct storage_msg* request);
29     void SetState(const char* new_state);
30     void LogTimeout();
31     void LogFinished();
32 
33     const char* id_;
34     uint32_t cmd_;
35     uint32_t op_id_;
36     uint32_t flags_;
37     const char* state_;
38 
39     using clock = std::chrono::high_resolution_clock;
40     clock::time_point start_;
41     clock::time_point state_change_;
42     std::chrono::milliseconds Elapsed(clock::time_point end);
43 
44     bool triggered_;
45 };
46 
watcher(const char * id,const struct storage_msg * request)47 watcher::watcher(const char* id, const struct storage_msg* request)
48     : id_(id), state_(nullptr), triggered_(false) {
49     cmd_ = request->cmd;
50     op_id_ = request->op_id;
51     flags_ = request->flags;
52 
53     start_ = clock::now();
54     state_change_ = start_;
55 }
56 
SetState(const char * new_state)57 void watcher::SetState(const char* new_state) {
58     state_ = new_state;
59     state_change_ = clock::now();
60 }
61 
LogTimeout()62 void watcher::LogTimeout() {
63     if (!triggered_) {
64         triggered_ = true;
65         LOG(ERROR) << "Storageproxyd watchdog triggered: " << id_ << " cmd: " << cmd_
66                    << " op_id: " << op_id_ << " flags: " << flags_;
67     }
68     if (state_) {
69         LOG(ERROR) << "...elapsed: " << Elapsed(clock::now()).count() << "ms (" << state_ << " "
70                    << Elapsed(state_change_).count() << "ms)";
71     } else {
72         LOG(ERROR) << "...elapsed: " << Elapsed(clock::now()).count() << "ms";
73     }
74 }
75 
LogFinished()76 void watcher::LogFinished() {
77     if (triggered_) {
78         LOG(ERROR) << "...completed: " << Elapsed(clock::now()).count() << "ms";
79     }
80 }
81 
Elapsed(watcher::clock::time_point end)82 std::chrono::milliseconds watcher::Elapsed(watcher::clock::time_point end) {
83     return std::chrono::duration_cast<std::chrono::milliseconds>(end - start_);
84 }
85 
86 namespace {
87 
88 class Watchdog {
89   private:
90     static constexpr std::chrono::milliseconds kDefaultTimeoutMs = std::chrono::milliseconds(500);
91     static constexpr std::chrono::milliseconds kMaxTimeoutMs = std::chrono::seconds(10);
92 
93   public:
Watchdog()94     Watchdog() : watcher_(), done_(false) {}
95     ~Watchdog();
96     struct watcher* RegisterWatch(const char* id, const struct storage_msg* request);
97     void AddProgress(struct watcher* watcher, const char* state);
98     void UnRegisterWatch(struct watcher* watcher);
99 
100   private:
101     // Syncronizes access to watcher_ and watcher_change_ between the main
102     // thread and watchdog loop thread. watcher_ may only be modified by the
103     // main thread; the watchdog loop is read-only.
104     std::mutex watcher_mutex_;
105     std::unique_ptr<struct watcher> watcher_;
106     std::condition_variable watcher_change_;
107 
108     std::thread watchdog_thread_;
109     bool done_;
110 
111     void WatchdogLoop();
112     void LogWatchdogTriggerLocked();
113 };
114 
115 Watchdog gWatchdog;
116 
117 }  // Anonymous namespace
118 
119 // Assumes that caller is single-threaded. If we want to use this from a
120 // multi-threaded context we need to ensure that the watchdog thread is
121 // initialized safely once and accessing an existing watcher is done while the
122 // watcher lock is held.
RegisterWatch(const char * id,const struct storage_msg * request)123 struct watcher* Watchdog::RegisterWatch(const char* id, const struct storage_msg* request) {
124     if (!watchdog_thread_.joinable()) {
125         watchdog_thread_ = std::thread(&Watchdog::WatchdogLoop, this);
126     }
127     if (watcher_) {
128         LOG(ERROR) << "Replacing registered watcher " << watcher_->id_;
129         UnRegisterWatch(watcher_.get());
130     }
131 
132     struct watcher* ret = nullptr;
133     {
134         std::unique_lock<std::mutex> watcherLock(watcher_mutex_);
135         watcher_ = std::make_unique<struct watcher>(id, request);
136         ret = watcher_.get();
137     }
138     watcher_change_.notify_one();
139     return ret;
140 }
141 
UnRegisterWatch(struct watcher * watcher)142 void Watchdog::UnRegisterWatch(struct watcher* watcher) {
143     {
144         std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
145         if (!watcher_) {
146             LOG(ERROR) << "Cannot unregister watcher, no watcher registered";
147             return;
148         }
149         if (watcher_.get() != watcher) {
150             LOG(ERROR) << "Unregistering watcher that doesn't match current watcher";
151         }
152         watcher_->LogFinished();
153         watcher_.reset(nullptr);
154     }
155     watcher_change_.notify_one();
156 }
157 
AddProgress(struct watcher * watcher,const char * state)158 void Watchdog::AddProgress(struct watcher* watcher, const char* state) {
159     std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
160     if (watcher_.get() != watcher) {
161         LOG(ERROR) << "Watcher was not registered, cannot log progress: " << state;
162         return;
163     }
164     watcher->SetState(state);
165 }
166 
WatchdogLoop()167 void Watchdog::WatchdogLoop() {
168     std::unique_lock<std::mutex> lock(watcher_mutex_);
169     std::chrono::milliseconds timeout = kDefaultTimeoutMs;
170 
171     while (!done_) {
172         // wait for a watch to be registered
173         watcher_change_.wait(lock, [this] { return !!watcher_; });
174 
175         // wait for the timeout or unregistration
176         timeout = kDefaultTimeoutMs;
177         do {
178             if (!watcher_change_.wait_for(lock, timeout, [this] { return !watcher_; })) {
179                 watcher_->LogTimeout();
180                 timeout = std::min(timeout * 2, kMaxTimeoutMs);
181             }
182         } while (!!watcher_);
183     }
184 }
185 
~Watchdog()186 Watchdog::~Watchdog() {
187     {
188         std::lock_guard<std::mutex> watcherLock(watcher_mutex_);
189         watcher_.reset(nullptr);
190         done_ = true;
191     }
192     watcher_change_.notify_one();
193     if (watchdog_thread_.joinable()) {
194         watchdog_thread_.join();
195     }
196 }
197 
watch_start(const char * id,const struct storage_msg * request)198 struct watcher* watch_start(const char* id, const struct storage_msg* request) {
199     return gWatchdog.RegisterWatch(id, request);
200 }
201 
watch_progress(struct watcher * watcher,const char * state)202 void watch_progress(struct watcher* watcher, const char* state) {
203     gWatchdog.AddProgress(watcher, state);
204 }
205 
watch_finish(struct watcher * watcher)206 void watch_finish(struct watcher* watcher) {
207     gWatchdog.UnRegisterWatch(watcher);
208 }
209