1 // Copyright 2021, The Android Open Source Project 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Can be removed when instrumentations are added to keystore. 16 #![allow(dead_code)] 17 18 //! This module implements a watchdog thread. 19 20 use std::{ 21 cmp::min, 22 collections::HashMap, 23 sync::Arc, 24 sync::{Condvar, Mutex, MutexGuard}, 25 thread, 26 }; 27 use std::{ 28 marker::PhantomData, 29 time::{Duration, Instant}, 30 }; 31 32 /// Represents a Watchdog record. It can be created with `Watchdog::watch` or 33 /// `Watchdog::watch_with`. It disarms the record when dropped. 34 pub struct WatchPoint { 35 id: &'static str, 36 wd: Arc<Watchdog>, 37 not_send: PhantomData<*mut ()>, // WatchPoint must not be Send. 38 } 39 40 impl Drop for WatchPoint { drop(&mut self)41 fn drop(&mut self) { 42 self.wd.disarm(self.id) 43 } 44 } 45 46 #[derive(Debug, PartialEq, Eq)] 47 enum State { 48 NotRunning, 49 Running, 50 } 51 52 #[derive(Debug, Clone, Hash, PartialEq, Eq)] 53 struct Index { 54 tid: thread::ThreadId, 55 id: &'static str, 56 } 57 58 struct Record { 59 started: Instant, 60 deadline: Instant, 61 callback: Option<Box<dyn Fn() -> String + Send + 'static>>, 62 } 63 64 struct WatchdogState { 65 state: State, 66 thread: Option<thread::JoinHandle<()>>, 67 timeout: Duration, 68 records: HashMap<Index, Record>, 69 last_report: Instant, 70 has_overdue: bool, 71 } 72 73 impl WatchdogState { update_overdue_and_find_next_timeout(&mut self) -> (bool, Option<Duration>)74 fn update_overdue_and_find_next_timeout(&mut self) -> (bool, Option<Duration>) { 75 let now = Instant::now(); 76 let mut next_timeout: Option<Duration> = None; 77 let mut has_overdue = false; 78 for (_, r) in self.records.iter() { 79 let timeout = r.deadline.saturating_duration_since(now); 80 if timeout == Duration::new(0, 0) { 81 has_overdue = true; 82 continue; 83 } 84 next_timeout = match next_timeout { 85 Some(nt) => { 86 if timeout < nt { 87 Some(timeout) 88 } else { 89 Some(nt) 90 } 91 } 92 None => Some(timeout), 93 }; 94 } 95 (has_overdue, next_timeout) 96 } 97 log_report(&mut self, has_overdue: bool) -> bool98 fn log_report(&mut self, has_overdue: bool) -> bool { 99 match (self.has_overdue, has_overdue) { 100 (true, true) => { 101 if self.last_report.elapsed() < Watchdog::NOISY_REPORT_TIMEOUT { 102 self.has_overdue = false; 103 return false; 104 } 105 } 106 (_, false) => { 107 self.has_overdue = false; 108 return false; 109 } 110 (false, true) => {} 111 } 112 self.last_report = Instant::now(); 113 self.has_overdue = has_overdue; 114 log::warn!("Keystore Watchdog report:"); 115 log::warn!("Overdue records:"); 116 let now = Instant::now(); 117 for (i, r) in self.records.iter() { 118 if r.deadline.saturating_duration_since(now) == Duration::new(0, 0) { 119 match &r.callback { 120 Some(cb) => { 121 log::warn!( 122 "{:?} {} Pending: {:?} Overdue {:?}: {}", 123 i.tid, 124 i.id, 125 r.started.elapsed(), 126 r.deadline.elapsed(), 127 (cb)() 128 ); 129 } 130 None => { 131 log::warn!( 132 "{:?} {} Pending: {:?} Overdue {:?}", 133 i.tid, 134 i.id, 135 r.started.elapsed(), 136 r.deadline.elapsed() 137 ); 138 } 139 } 140 } 141 } 142 true 143 } 144 disarm(&mut self, index: Index)145 fn disarm(&mut self, index: Index) { 146 self.records.remove(&index); 147 } 148 arm(&mut self, index: Index, record: Record)149 fn arm(&mut self, index: Index, record: Record) { 150 if self.records.insert(index.clone(), record).is_some() { 151 log::warn!("Recursive watchdog record at \"{:?}\" replaces previous record.", index); 152 } 153 } 154 } 155 156 /// Watchdog spawns a thread that logs records of all overdue watch points when a deadline 157 /// is missed and at least every second as long as overdue watch points exist. 158 /// The thread terminates when idle for a given period of time. 159 pub struct Watchdog { 160 state: Arc<(Condvar, Mutex<WatchdogState>)>, 161 } 162 163 impl Watchdog { 164 /// If we have overdue records, we want to be noisy about it and log a report 165 /// at least every `NOISY_REPORT_TIMEOUT` interval. 166 const NOISY_REPORT_TIMEOUT: Duration = Duration::from_secs(1); 167 168 /// Construct a [`Watchdog`]. When `timeout` has elapsed since the watchdog thread became 169 /// idle, i.e., there are no more active or overdue watch points, the watchdog thread 170 /// terminates. new(timeout: Duration) -> Arc<Self>171 pub fn new(timeout: Duration) -> Arc<Self> { 172 Arc::new(Self { 173 state: Arc::new(( 174 Condvar::new(), 175 Mutex::new(WatchdogState { 176 state: State::NotRunning, 177 thread: None, 178 timeout, 179 records: HashMap::new(), 180 last_report: Instant::now(), 181 has_overdue: false, 182 }), 183 )), 184 }) 185 } 186 watch_with_optional( wd: &Arc<Self>, callback: Option<Box<dyn Fn() -> String + Send + 'static>>, id: &'static str, timeout: Duration, ) -> Option<WatchPoint>187 fn watch_with_optional( 188 wd: &Arc<Self>, 189 callback: Option<Box<dyn Fn() -> String + Send + 'static>>, 190 id: &'static str, 191 timeout: Duration, 192 ) -> Option<WatchPoint> { 193 let deadline = Instant::now().checked_add(timeout); 194 if deadline.is_none() { 195 log::warn!("Deadline computation failed for WatchPoint \"{}\"", id); 196 log::warn!("WatchPoint not armed."); 197 return None; 198 } 199 wd.arm(callback, id, deadline.unwrap()); 200 Some(WatchPoint { id, wd: wd.clone(), not_send: Default::default() }) 201 } 202 203 /// Create a new watch point. If the WatchPoint is not dropped before the timeout 204 /// expires, a report is logged at least every second, which includes the id string 205 /// and whatever string the callback returns. watch_with( wd: &Arc<Self>, id: &'static str, timeout: Duration, callback: impl Fn() -> String + Send + 'static, ) -> Option<WatchPoint>206 pub fn watch_with( 207 wd: &Arc<Self>, 208 id: &'static str, 209 timeout: Duration, 210 callback: impl Fn() -> String + Send + 'static, 211 ) -> Option<WatchPoint> { 212 Self::watch_with_optional(wd, Some(Box::new(callback)), id, timeout) 213 } 214 215 /// Like `watch_with`, but without a callback. watch(wd: &Arc<Self>, id: &'static str, timeout: Duration) -> Option<WatchPoint>216 pub fn watch(wd: &Arc<Self>, id: &'static str, timeout: Duration) -> Option<WatchPoint> { 217 Self::watch_with_optional(wd, None, id, timeout) 218 } 219 arm( &self, callback: Option<Box<dyn Fn() -> String + Send + 'static>>, id: &'static str, deadline: Instant, )220 fn arm( 221 &self, 222 callback: Option<Box<dyn Fn() -> String + Send + 'static>>, 223 id: &'static str, 224 deadline: Instant, 225 ) { 226 let tid = thread::current().id(); 227 let index = Index { tid, id }; 228 let record = Record { started: Instant::now(), deadline, callback }; 229 230 let (ref condvar, ref state) = *self.state; 231 232 let mut state = state.lock().unwrap(); 233 state.arm(index, record); 234 235 if state.state != State::Running { 236 self.spawn_thread(&mut state); 237 } 238 drop(state); 239 condvar.notify_all(); 240 } 241 disarm(&self, id: &'static str)242 fn disarm(&self, id: &'static str) { 243 let tid = thread::current().id(); 244 let index = Index { tid, id }; 245 let (_, ref state) = *self.state; 246 247 let mut state = state.lock().unwrap(); 248 state.disarm(index); 249 // There is no need to notify condvar. There is no action required for the 250 // watchdog thread before the next deadline. 251 } 252 spawn_thread(&self, state: &mut MutexGuard<WatchdogState>)253 fn spawn_thread(&self, state: &mut MutexGuard<WatchdogState>) { 254 if let Some(t) = state.thread.take() { 255 t.join().expect("Watchdog thread panicked."); 256 } 257 258 let cloned_state = self.state.clone(); 259 260 state.thread = Some(thread::spawn(move || { 261 let (ref condvar, ref state) = *cloned_state; 262 263 let mut state = state.lock().unwrap(); 264 265 loop { 266 let (has_overdue, next_timeout) = state.update_overdue_and_find_next_timeout(); 267 state.log_report(has_overdue); 268 let (next_timeout, idle) = match (has_overdue, next_timeout) { 269 (true, Some(next_timeout)) => { 270 (min(next_timeout, Self::NOISY_REPORT_TIMEOUT), false) 271 } 272 (false, Some(next_timeout)) => (next_timeout, false), 273 (true, None) => (Self::NOISY_REPORT_TIMEOUT, false), 274 (false, None) => (state.timeout, true), 275 }; 276 277 let (s, timeout) = condvar.wait_timeout(state, next_timeout).unwrap(); 278 state = s; 279 280 if idle && timeout.timed_out() && state.records.is_empty() { 281 state.state = State::NotRunning; 282 break; 283 } 284 } 285 log::info!("Watchdog thread idle -> terminating. Have a great day."); 286 })); 287 state.state = State::Running; 288 } 289 } 290 291 #[cfg(test)] 292 mod tests { 293 294 use super::*; 295 use std::sync::atomic; 296 use std::thread; 297 use std::time::Duration; 298 299 #[test] test_watchdog()300 fn test_watchdog() { 301 android_logger::init_once( 302 android_logger::Config::default() 303 .with_tag("keystore2_watchdog_tests") 304 .with_min_level(log::Level::Debug), 305 ); 306 307 let wd = Watchdog::new(Watchdog::NOISY_REPORT_TIMEOUT.checked_mul(3).unwrap()); 308 let hit_count = Arc::new(atomic::AtomicU8::new(0)); 309 let hit_count_clone = hit_count.clone(); 310 let wp = 311 Watchdog::watch_with(&wd, "test_watchdog", Duration::from_millis(100), move || { 312 format!("hit_count: {}", hit_count_clone.fetch_add(1, atomic::Ordering::Relaxed)) 313 }); 314 assert_eq!(0, hit_count.load(atomic::Ordering::Relaxed)); 315 thread::sleep(Duration::from_millis(500)); 316 assert_eq!(1, hit_count.load(atomic::Ordering::Relaxed)); 317 thread::sleep(Watchdog::NOISY_REPORT_TIMEOUT); 318 assert_eq!(2, hit_count.load(atomic::Ordering::Relaxed)); 319 drop(wp); 320 thread::sleep(Watchdog::NOISY_REPORT_TIMEOUT.checked_mul(4).unwrap()); 321 assert_eq!(2, hit_count.load(atomic::Ordering::Relaxed)); 322 let (_, ref state) = *wd.state; 323 let state = state.lock().unwrap(); 324 assert_eq!(state.state, State::NotRunning); 325 } 326 } 327