• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::fmt;
6 use std::fmt::Write as _;
7 use std::mem;
8 use std::sync::Arc;
9 use std::sync::Mutex;
10 use std::sync::Weak;
11 use std::thread;
12 use std::thread::JoinHandle;
13 use std::time::Duration;
14 
15 use base::error;
16 use base::AsRawDescriptor;
17 use base::Error as SysError;
18 use base::Event;
19 use base::EventToken;
20 use base::FromRawDescriptor;
21 use base::SafeDescriptor;
22 use base::WaitContext;
23 use chrono::DateTime;
24 use chrono::Local;
25 use winapi::shared::minwindef::DWORD;
26 use winapi::shared::minwindef::FILETIME;
27 use winapi::um::processthreadsapi::GetProcessTimes;
28 use winapi::um::processthreadsapi::GetSystemTimes;
29 use winapi::um::processthreadsapi::OpenProcess;
30 use winapi::um::psapi::GetProcessMemoryInfo;
31 use winapi::um::psapi::PROCESS_MEMORY_COUNTERS;
32 use winapi::um::psapi::PROCESS_MEMORY_COUNTERS_EX;
33 use winapi::um::winbase::GetProcessIoCounters;
34 use winapi::um::winnt::IO_COUNTERS;
35 use winapi::um::winnt::LARGE_INTEGER;
36 use winapi::um::winnt::LONGLONG;
37 use winapi::um::winnt::PROCESS_QUERY_LIMITED_INFORMATION;
38 use winapi::um::winnt::PROCESS_VM_READ;
39 use winapi::um::winnt::SYNCHRONIZE;
40 
41 use crate::log_metric;
42 use crate::windows::Error;
43 use crate::windows::Result;
44 use crate::windows::METRIC_UPLOAD_INTERVAL_SECONDS;
45 use crate::MetricEventType;
46 
47 const BYTES_PER_MB: usize = 1024 * 1024;
48 const WORKER_REPORT_INTERVAL: Duration = Duration::from_secs(1);
49 
50 type SysResult<T> = std::result::Result<T, SysError>;
51 
52 /// A worker job which periodically logs system metrics.
53 struct Worker {
54     exit_evt: Event,
55     io: Arc<Mutex<Option<ProcessIoRecord>>>,
56     measurements: Arc<Mutex<Option<Measurements>>>,
57     memory: Arc<Mutex<ProcessMemory>>,
58     memory_acc: Arc<Mutex<Option<ProcessMemoryAccumulated>>>,
59     metrics_string: Arc<Mutex<String>>,
60 }
61 
62 impl Worker {
run(&mut self)63     fn run(&mut self) {
64         #[derive(EventToken)]
65         enum Token {
66             Exit,
67         }
68         let event_ctx: WaitContext<Token> =
69             match WaitContext::build_with(&[(&self.exit_evt, Token::Exit)]) {
70                 Ok(event_ctx) => event_ctx,
71                 Err(e) => {
72                     error!("failed creating WaitContext: {}", e);
73                     return;
74                 }
75             };
76         let mut last_metric_upload_time = Local::now();
77         'poll: loop {
78             let events = match event_ctx.wait_timeout(WORKER_REPORT_INTERVAL) {
79                 Ok(events) => events,
80                 Err(e) => {
81                     error!("failed polling for events: {}", e);
82                     return;
83                 }
84             };
85             if events.is_empty() {
86                 self.collect_metrics();
87                 // Time budget for UI thread is very limited.
88                 // We make the metric string for displaying in UI in the
89                 // worker thread for best performance.
90                 self.make_metrics_string();
91 
92                 self.upload_metrics(&mut last_metric_upload_time);
93             }
94 
95             if events.into_iter().any(|e| e.is_readable) {
96                 break 'poll;
97             }
98         }
99     }
100 
make_metrics_string(&mut self)101     fn make_metrics_string(&mut self) {
102         let mut metrics_string = self.metrics_string.lock().unwrap();
103         *metrics_string = format!(
104             "{}\n{}",
105             self.cpu_metrics_string(),
106             self.mem_metrics_string()
107         );
108     }
109 
upload_metrics(&self, last_metric_upload_time: &mut DateTime<Local>)110     fn upload_metrics(&self, last_metric_upload_time: &mut DateTime<Local>) {
111         let time_elapsed = (Local::now() - *last_metric_upload_time).num_seconds();
112         if time_elapsed >= METRIC_UPLOAD_INTERVAL_SECONDS {
113             let mut memory_acc = self.memory_acc.lock().unwrap();
114             if let Some(acc) = &*memory_acc {
115                 let mem = acc.accumulated.physical / acc.accumulated_count / BYTES_PER_MB;
116                 // The i64 cast will not cause overflow because the mem is at most 10TB for
117                 // Windows 10.
118                 log_metric(MetricEventType::MemoryUsage, mem as i64);
119             }
120             *memory_acc = None;
121 
122             let mut cpu_measurements = self.measurements.lock().unwrap();
123             if let Some(measurements) = &*cpu_measurements {
124                 let sys_time = measurements.current.sys_time;
125                 let process_time = measurements.current.process_time;
126                 let prev_sys_time = measurements.last_upload.sys_time;
127                 let prev_process_time = measurements.last_upload.process_time;
128 
129                 let diff_systime_kernel =
130                     compute_filetime_subtraction(sys_time.kernel, prev_sys_time.kernel);
131                 let diff_systime_user =
132                     compute_filetime_subtraction(sys_time.user, prev_sys_time.user);
133 
134                 let diff_processtime_kernel =
135                     compute_filetime_subtraction(process_time.kernel, prev_process_time.kernel);
136                 let diff_processtime_user =
137                     compute_filetime_subtraction(process_time.user, prev_process_time.user);
138 
139                 let total_systime = diff_systime_kernel + diff_systime_user;
140                 let total_processtime = diff_processtime_kernel + diff_processtime_user;
141 
142                 if total_systime > 0 {
143                     let cpu_usage = 100 * total_processtime / total_systime;
144                     // The i64 cast will not cause overflow because the usage is at most 100.
145                     log_metric(MetricEventType::CpuUsage, cpu_usage as i64);
146                 }
147             }
148             *cpu_measurements = None;
149 
150             let mut io = self.io.lock().unwrap();
151             if let Some(io_record) = &*io {
152                 let new_io_read_bytes =
153                     io_record.current.read_bytes - io_record.last_upload.read_bytes;
154                 let new_io_write_bytes =
155                     io_record.current.write_bytes - io_record.last_upload.write_bytes;
156 
157                 let ms_elapsed =
158                     (io_record.current_time - io_record.last_upload_time).num_milliseconds();
159                 if ms_elapsed > 0 {
160                     let io_read_bytes_per_sec =
161                         (new_io_read_bytes as f32) / (ms_elapsed as f32) * 1000.0;
162                     let io_write_bytes_per_sec =
163                         (new_io_write_bytes as f32) / (ms_elapsed as f32) * 1000.0;
164                     log_metric(MetricEventType::ReadIo, io_read_bytes_per_sec as i64);
165                     log_metric(MetricEventType::WriteIo, io_write_bytes_per_sec as i64);
166                 }
167             }
168             *io = None;
169             *last_metric_upload_time = Local::now();
170         }
171     }
172 
collect_metrics(&mut self)173     fn collect_metrics(&mut self) {
174         match self.get_cpu_metrics() {
175             Ok(new_measurement) => {
176                 let mut measurements = self.measurements.lock().unwrap();
177                 let next_measurements = match *measurements {
178                     Some(Measurements {
179                         current,
180                         last_upload,
181                         ..
182                     }) => Measurements {
183                         current: new_measurement,
184                         previous: current,
185                         last_upload,
186                     },
187                     None => Measurements {
188                         current: new_measurement,
189                         previous: new_measurement,
190                         last_upload: new_measurement,
191                     },
192                 };
193                 *measurements = Some(next_measurements);
194             }
195             Err(e) => {
196                 // Do not panic because of cpu query related failures.
197                 error!("Get cpu measurement failed: {}", e);
198             }
199         }
200 
201         match self.get_mem_metrics() {
202             Ok(mem) => {
203                 // Keep running sum and count to calculate averages.
204                 let mut memory_acc = self.memory_acc.lock().unwrap();
205                 let updated_memory_acc = match *memory_acc {
206                     Some(acc) => accumulate_process_memory(acc, mem),
207                     None => ProcessMemoryAccumulated {
208                         accumulated: mem,
209                         accumulated_count: 1,
210                     },
211                 };
212                 *memory_acc = Some(updated_memory_acc);
213                 *self.memory.lock().unwrap() = mem
214             }
215             Err(e) => {
216                 // Do not panic because of memory query failures.
217                 error!("Get cpu measurement failed: {}", e);
218             }
219         }
220 
221         match self.get_io_metrics() {
222             Ok(new_io) => {
223                 let mut io_record = self.io.lock().unwrap();
224                 let updated_io = match *io_record {
225                     Some(io) => ProcessIoRecord {
226                         current: new_io,
227                         current_time: Local::now(),
228                         last_upload: io.last_upload,
229                         last_upload_time: io.last_upload_time,
230                     },
231                     None => ProcessIoRecord {
232                         current: new_io,
233                         current_time: Local::now(),
234                         last_upload: new_io,
235                         last_upload_time: Local::now(),
236                     },
237                 };
238                 *io_record = Some(updated_io);
239             }
240             Err(e) => {
241                 // Do not panic because of io query failures.
242                 error!("Get io measurement failed: {}", e);
243             }
244         }
245     }
246 
get_mem_metrics(&self) -> SysResult<ProcessMemory>247     fn get_mem_metrics(&self) -> SysResult<ProcessMemory> {
248         let process_handle = CoreWinMetrics::get_process_handle()?;
249 
250         let mut counters = PROCESS_MEMORY_COUNTERS_EX::default();
251 
252         // Safe because we own the process handle and all memory was allocated.
253         let result = unsafe {
254             GetProcessMemoryInfo(
255                 process_handle.as_raw_descriptor(),
256                 // Converting is necessary because the `winapi`' GetProcessMemoryInfo
257                 // does NOT support `PROCESS_MEMORY_COUNTERS_EX`, but only
258                 // 'PROCESS_MEMORY_COUNTERS'. The casting is safe because the underlining
259                 // Windows api does `PROCESS_MEMORY_COUNTERS_EX`.
260                 &mut counters as *mut PROCESS_MEMORY_COUNTERS_EX as *mut PROCESS_MEMORY_COUNTERS,
261                 mem::size_of::<PROCESS_MEMORY_COUNTERS_EX>() as DWORD,
262             )
263         };
264         if result == 0 {
265             return Err(SysError::last());
266         }
267 
268         Ok(ProcessMemory {
269             page_fault_count: counters.PageFaultCount,
270             working_set_size: counters.WorkingSetSize,
271             working_set_peak: counters.PeakWorkingSetSize,
272             page_file_usage: counters.PagefileUsage,
273             page_file_peak: counters.PeakPagefileUsage,
274             physical: counters.PrivateUsage,
275         })
276     }
277 
get_cpu_metrics(&self) -> SysResult<Measurement>278     fn get_cpu_metrics(&self) -> SysResult<Measurement> {
279         let mut sys_time: SystemCpuTime = Default::default();
280         let mut process_time: ProcessCpuTime = Default::default();
281         let sys_time_success: i32;
282 
283         // Safe because memory is allocated for sys_time before the windows call.
284         // And the value were initilized to 0s.
285         unsafe {
286             // First get kernel cpu time.
287             sys_time_success =
288                 GetSystemTimes(&mut sys_time.idle, &mut sys_time.kernel, &mut sys_time.user);
289         }
290         if sys_time_success == 0 {
291             error!("Systime collection failed.\n");
292             return Err(SysError::last());
293         } else {
294             // Query current process cpu time.
295             let process_handle = CoreWinMetrics::get_process_handle()?;
296             let process_time_success: i32;
297             // Safe because memory is allocated for process_time before the windows call.
298             // And the value were initilized to 0s.
299             unsafe {
300                 process_time_success = GetProcessTimes(
301                     process_handle.as_raw_descriptor(),
302                     &mut process_time.create,
303                     &mut process_time.exit,
304                     &mut process_time.kernel,
305                     &mut process_time.user,
306                 );
307             }
308             if process_time_success == 0 {
309                 error!("Systime collection failed.\n");
310                 return Err(SysError::last());
311             }
312         }
313         Ok(Measurement {
314             sys_time: SystemCpuTime {
315                 idle: sys_time.idle,
316                 kernel: sys_time.kernel,
317                 user: sys_time.user,
318             },
319             process_time: ProcessCpuTime {
320                 create: process_time.create,
321                 exit: process_time.exit,
322                 kernel: process_time.kernel,
323                 user: process_time.user,
324             },
325         })
326     }
327 
get_io_metrics(&self) -> SysResult<ProcessIo>328     fn get_io_metrics(&self) -> SysResult<ProcessIo> {
329         let process_handle = CoreWinMetrics::get_process_handle()?;
330         let mut io_counters = IO_COUNTERS::default();
331         // Safe because we own the process handle and all memory was allocated.
332         let result = unsafe {
333             GetProcessIoCounters(
334                 process_handle.as_raw_descriptor(),
335                 &mut io_counters as *mut IO_COUNTERS,
336             )
337         };
338         if result == 0 {
339             return Err(SysError::last());
340         }
341         Ok(ProcessIo {
342             read_bytes: io_counters.ReadTransferCount,
343             write_bytes: io_counters.WriteTransferCount,
344         })
345     }
346 
mem_metrics_string(&self) -> String347     fn mem_metrics_string(&self) -> String {
348         let guard = self.memory.lock().unwrap();
349         let memory: ProcessMemory = *guard;
350         let mut buf = format!(
351             "Physical memory used: {} mb.\n",
352             memory.physical / BYTES_PER_MB
353         );
354         let _ = writeln!(
355             buf,
356             "Total working memory: {} mb.",
357             memory.working_set_size / BYTES_PER_MB
358         );
359         let _ = writeln!(
360             buf,
361             "Peak working memory: {} mb.",
362             memory.working_set_peak / BYTES_PER_MB
363         );
364         let _ = writeln!(buf, "Page fault count: {}.", memory.page_fault_count);
365         let _ = writeln!(
366             buf,
367             "Page file used: {} mb.",
368             memory.page_file_usage / BYTES_PER_MB
369         );
370         let _ = writeln!(
371             buf,
372             "Peak page file used: {} mb.",
373             memory.page_file_peak / BYTES_PER_MB
374         );
375         buf
376     }
377 
cpu_metrics_string(&self) -> String378     fn cpu_metrics_string(&self) -> String {
379         let guard = self.measurements.lock().unwrap();
380         let mut buf = String::new();
381 
382         // Now we use current and last cpu measurment data to calculate cpu usage
383         // as a percentage.
384         if let Some(measurements) = &*guard {
385             let sys_time = measurements.current.sys_time;
386             let process_time = measurements.current.process_time;
387             let prev_sys_time = measurements.previous.sys_time;
388             let prev_process_time = measurements.previous.process_time;
389 
390             let diff_systime_kernel =
391                 compute_filetime_subtraction(sys_time.kernel, prev_sys_time.kernel);
392             let diff_systime_user = compute_filetime_subtraction(sys_time.user, prev_sys_time.user);
393 
394             let diff_processtime_kernel =
395                 compute_filetime_subtraction(process_time.kernel, prev_process_time.kernel);
396             let diff_processtime_user =
397                 compute_filetime_subtraction(process_time.user, prev_process_time.user);
398 
399             let total_systime = diff_systime_kernel + diff_systime_user;
400             let total_processtime = diff_processtime_kernel + diff_processtime_user;
401 
402             let mut process_cpu = String::from("still calculating...");
403             if total_systime > 0 {
404                 process_cpu = format!("{}%", (100 * total_processtime / total_systime));
405             }
406             let _ = writeln!(buf, "Process cpu usage is: {}", process_cpu);
407 
408             #[cfg(debug_assertions)]
409             {
410                 // Show data supporting our cpu usage calculation.
411                 // Output system cpu time.
412                 let _ = writeln!(
413                     buf,
414                     "Systime Idle: low {} / high {}",
415                     sys_time.idle.dwLowDateTime, sys_time.idle.dwHighDateTime
416                 );
417                 let _ = writeln!(
418                     buf,
419                     "Systime User: low {} / high {}",
420                     sys_time.user.dwLowDateTime, sys_time.user.dwHighDateTime
421                 );
422                 let _ = writeln!(
423                     buf,
424                     "Systime kernel: low {} / high {}",
425                     sys_time.kernel.dwLowDateTime, sys_time.kernel.dwHighDateTime
426                 );
427                 // Output process cpu time.
428                 let _ = writeln!(
429                     buf,
430                     "Process Create: low {} / high {}",
431                     process_time.create.dwLowDateTime, process_time.create.dwHighDateTime
432                 );
433                 let _ = writeln!(
434                     buf,
435                     "Process Exit: low {} / high {}",
436                     process_time.exit.dwLowDateTime, process_time.exit.dwHighDateTime
437                 );
438                 let _ = writeln!(
439                     buf,
440                     "Process kernel: low {} / high {}",
441                     process_time.kernel.dwLowDateTime, process_time.kernel.dwHighDateTime
442                 );
443                 let _ = writeln!(
444                     buf,
445                     "Process user: low {} / high {}",
446                     process_time.user.dwLowDateTime, process_time.user.dwHighDateTime
447                 );
448             }
449         } else {
450             let _ = write!(buf, "Calculating cpu usage...");
451         }
452         buf
453     }
454 }
455 
compute_filetime_subtraction(fta: FILETIME, ftb: FILETIME) -> LONGLONG456 fn compute_filetime_subtraction(fta: FILETIME, ftb: FILETIME) -> LONGLONG {
457     // safe because we are initializing the struct to 0s.
458     unsafe {
459         let mut a: LARGE_INTEGER = mem::zeroed::<LARGE_INTEGER>();
460         a.u_mut().LowPart = fta.dwLowDateTime;
461         a.u_mut().HighPart = fta.dwHighDateTime as i32;
462         let mut b: LARGE_INTEGER = mem::zeroed::<LARGE_INTEGER>();
463         b.u_mut().LowPart = ftb.dwLowDateTime;
464         b.u_mut().HighPart = ftb.dwHighDateTime as i32;
465         a.QuadPart() - b.QuadPart()
466     }
467 }
468 
469 // Adds to a running total of memory metrics over the course of a collection period.
470 // Can divide these sums to calculate averages.
accumulate_process_memory( acc: ProcessMemoryAccumulated, mem: ProcessMemory, ) -> ProcessMemoryAccumulated471 fn accumulate_process_memory(
472     acc: ProcessMemoryAccumulated,
473     mem: ProcessMemory,
474 ) -> ProcessMemoryAccumulated {
475     ProcessMemoryAccumulated {
476         accumulated: ProcessMemory {
477             page_fault_count: mem.page_fault_count,
478             working_set_size: acc.accumulated.working_set_size + mem.working_set_size,
479             working_set_peak: mem.working_set_peak,
480             page_file_usage: acc.accumulated.page_file_usage + mem.page_file_usage,
481             page_file_peak: mem.page_file_peak,
482             physical: acc.accumulated.physical + mem.physical,
483         },
484         accumulated_count: acc.accumulated_count + 1,
485     }
486 }
487 
488 #[derive(Copy, Clone, Default)]
489 struct SystemCpuTime {
490     idle: FILETIME,
491     kernel: FILETIME,
492     user: FILETIME,
493 }
494 
495 #[derive(Copy, Clone, Default)]
496 struct ProcessCpuTime {
497     create: FILETIME,
498     exit: FILETIME,
499     kernel: FILETIME,
500     user: FILETIME,
501 }
502 
503 #[derive(Copy, Clone, Default)]
504 struct ProcessMemory {
505     page_fault_count: u32,
506     working_set_size: usize,
507     working_set_peak: usize,
508     page_file_usage: usize,
509     page_file_peak: usize,
510     physical: usize,
511 }
512 
513 #[derive(Copy, Clone)]
514 struct ProcessMemoryAccumulated {
515     accumulated: ProcessMemory,
516     accumulated_count: usize,
517 }
518 
519 #[derive(Copy, Clone, Default)]
520 struct ProcessIo {
521     read_bytes: u64,
522     write_bytes: u64,
523 }
524 
525 #[derive(Copy, Clone)]
526 struct ProcessIoRecord {
527     current: ProcessIo,
528     current_time: DateTime<Local>,
529     last_upload: ProcessIo,
530     last_upload_time: DateTime<Local>,
531 }
532 
533 #[derive(Copy, Clone)]
534 struct Measurement {
535     sys_time: SystemCpuTime,
536     process_time: ProcessCpuTime,
537 }
538 
539 struct Measurements {
540     current: Measurement,
541     previous: Measurement,
542     last_upload: Measurement,
543 }
544 
545 /// A managing struct for a job which defines regular logging of core Windows system metrics.
546 pub(crate) struct CoreWinMetrics {
547     metrics_string: Weak<Mutex<String>>,
548     exit_evt: Event,
549     worker_thread: Option<JoinHandle<()>>,
550 }
551 
552 impl CoreWinMetrics {
new() -> Result<Self>553     pub fn new() -> Result<Self> {
554         let exit_evt = match Event::new() {
555             Ok(evt) => evt,
556             Err(_e) => return Err(Error::CannotInstantiateEvent),
557         };
558 
559         let metrics_string = String::new();
560         let arc_metrics_memory = Arc::new(Mutex::new(metrics_string));
561         let weak_metrics_memory = Arc::downgrade(&arc_metrics_memory);
562 
563         let mut me = Self {
564             metrics_string: weak_metrics_memory,
565             exit_evt,
566             worker_thread: None,
567         };
568         let exit_evt_clone = match me.exit_evt.try_clone() {
569             Ok(evt) => evt,
570             Err(_) => return Err(Error::CannotCloneEvent),
571         };
572         me.worker_thread.replace(thread::spawn(|| {
573             Worker {
574                 exit_evt: exit_evt_clone,
575                 io: Arc::new(Mutex::new(None)),
576                 measurements: Arc::new(Mutex::new(None)),
577                 memory: Arc::new(Mutex::new(Default::default())),
578                 memory_acc: Arc::new(Mutex::new(None)),
579                 metrics_string: arc_metrics_memory,
580             }
581             .run();
582         }));
583         Ok(me)
584     }
585 
get_process_handle() -> SysResult<SafeDescriptor>586     fn get_process_handle() -> SysResult<SafeDescriptor> {
587         // Safe because we own the current process.
588         let process_handle = unsafe {
589             OpenProcess(
590                 PROCESS_QUERY_LIMITED_INFORMATION | PROCESS_VM_READ | SYNCHRONIZE,
591                 0,
592                 std::process::id(),
593             )
594         };
595         if process_handle.is_null() {
596             return Err(SysError::last());
597         }
598         // Safe as the SafeDescriptor is the only thing with access to the handle after this.
599         Ok(unsafe { SafeDescriptor::from_raw_descriptor(process_handle) })
600     }
601 }
602 
603 impl Drop for CoreWinMetrics {
drop(&mut self)604     fn drop(&mut self) {
605         if let Some(join_handle) = self.worker_thread.take() {
606             let _ = self.exit_evt.signal();
607             join_handle
608                 .join()
609                 .expect("fail to join the worker thread of a win core metrics collector.");
610         }
611     }
612 }
613 
614 impl fmt::Display for CoreWinMetrics {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result615     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
616         match self.metrics_string.upgrade() {
617             Some(metrics_string) => write!(f, "{}", *metrics_string.lock().unwrap()),
618             None => write!(f, ""),
619         }
620     }
621 }
622