1#!/usr/bin/python 2# Copyright 2014 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import errno 7import json 8import mmap 9import optparse 10import os 11import signal 12import sys 13import syslog 14import time 15 16# The prefix of FIFO files used when using background processes. 17RESULT_FIFO_PREFIX = '/tmp/update_engine_performance_monitor_fifo' 18 19class UpdateEnginePerformanceMonitor(object): 20 """Performance and resource usage monitor script. 21 22 This script is intended to run on the DUT and will dump 23 performance data as a JSON document when done. It can be run in 24 the background using the --start-bg and --stop-bg options. 25 """ 26 27 def __init__(self, verbose, timeout_seconds): 28 """Instance initializer. 29 30 @param verbose: if True, prints debug info stderr. 31 32 @param timeout_seconds: maximum amount of time to monitor for. 33 """ 34 self.verbose = verbose 35 self.timeout_seconds = timeout_seconds 36 37 38 @staticmethod 39 def get_update_engine_pids(): 40 """Gets all processes (tasks) in the update-engine cgroup. 41 42 @return a list of process identifiers. 43 """ 44 with open('/sys/fs/cgroup/cpu/update-engine/tasks') as f: 45 return [int(i) for i in f.read().split()] 46 47 48 @staticmethod 49 def get_info_for_pid(pid, pids_processed): 50 """Get information about a process. 51 52 The returned information is a tuple where the first element is 53 the process name and the second element is the RSS size in 54 bytes. The task and its siblings (e.g. tasks belonging to the 55 same process) will be set in the |pids_processed| set. 56 57 @param pid: the task to get information about. 58 59 @param pids_processed: set of process identifiers. 60 61 @return a tuple with information. 62 """ 63 try: 64 with open('/proc/%d/stat' % pid) as f: 65 fields = f.read().split() 66 # According to the proc(4) man page, field 23 is the 67 # number of pages in the resident set. 68 comm = fields[1] 69 rss = int(fields[23]) * mmap.PAGESIZE 70 tasks = os.listdir('/proc/%d/task'%pid) 71 # Mark all tasks belonging to the process to avoid 72 # double-counting their RSS. 73 for t in tasks: 74 pids_processed.add(int(t)) 75 return rss, comm 76 except (IOError, OSError) as e: 77 # It's possible that the task vanished in the window 78 # between reading the 'tasks' file and when attempting to 79 # read from it (ditto for iterating over the 'task' 80 # directory). Handle this gracefully. 81 if e.errno == errno.ENOENT: 82 return 0, '' 83 raise 84 85 86 def do_sample(self): 87 """Sampling method. 88 89 This collects information about all the processes in the 90 update-engine cgroup. The information is used to e.g. maintain 91 historical peaks etc. 92 """ 93 if self.verbose: 94 sys.stderr.write('========================================\n') 95 rss_total = 0 96 pids = self.get_update_engine_pids() 97 pids_processed = set() 98 # Loop over all PIDs (tasks) in the update-engine cgroup and 99 # be careful not to double-count PIDs (tasks) belonging to the 100 # same process. 101 for pid in pids: 102 if pid not in pids_processed: 103 rss, comm = self.get_info_for_pid(pid, pids_processed) 104 rss_total += rss 105 if self.verbose: 106 sys.stderr.write('pid %d %s -> %d KiB\n' % 107 (pid, comm, rss/1024)) 108 else: 109 if self.verbose: 110 sys.stderr.write('pid %d already counted\n' % pid) 111 self.rss_peak = max(rss_total, self.rss_peak) 112 if self.verbose: 113 sys.stderr.write('Total = %d KiB\n' % (rss_total / 1024)) 114 sys.stderr.write('Peak = %d KiB\n' % (self.rss_peak / 1024)) 115 116 117 def signal_handler(self, signal, frame): 118 """Signal handler used to terminate monitoring. 119 120 @param signal: the signal delivered. 121 122 @param frame: the interrupted stack frame. 123 """ 124 self.request_exit = True 125 126 127 def run(self, signum): 128 """Main sampling loop. 129 130 Periodically sample and process performance data until the 131 signal specified by |signum| is sent to the 132 process. Returns recorded data as a string. 133 134 @param signum: the signal to wait (e.g. signal.SIGTERM) or None. 135 136 @return a string with JSON data or None if the timeout 137 deadline has been exceeded. 138 """ 139 if signum: 140 signal.signal(signum, self.signal_handler) 141 self.rss_peak = 0 142 self.request_exit = False 143 timeout_deadline = time.time() + self.timeout_seconds 144 while not self.request_exit: 145 monitor.do_sample() 146 time.sleep(0.1) 147 if time.time() > timeout_deadline: 148 return None 149 return json.dumps({'rss_peak': self.rss_peak}) 150 151 152class WriteToSyslog: 153 """File-like object to log messages to syslog. 154 155 Instances of this object can be assigned to e.g. sys.stderr to log 156 errors/backtraces to syslog. 157 """ 158 159 def __init__(self, ident): 160 """Instance initializer. 161 162 @param ident: string to identify program by. 163 """ 164 syslog.openlog(ident, syslog.LOG_PID, syslog.LOG_DAEMON) 165 166 167 def write(self, data): 168 """Overridden write() method. 169 170 @param data: the data to write. 171 """ 172 syslog.syslog(syslog.LOG_ERR, data) 173 174 175def daemonize_and_print_pid_on_stdout(): 176 """Daemonizes and prints the daemon process pid on stdout and 177 exits. 178 179 When this function returns, the process is a properly detached daemon 180 process parented by pid 1. This is basically the standard double-fork 181 daemonization dance as described in W. Richard Stevens, "Advanced 182 Programming in the Unix Environment", 1992, Addison-Wesley, ISBN 183 0-201-56317-7 184 """ 185 first_child = os.fork() 186 if first_child != 0: 187 # Exit first child. 188 sys.exit(0) 189 os.chdir('/') 190 os.setsid() 191 os.umask(0) 192 second_child = os.fork() 193 if second_child != 0: 194 # Parent, write child pid to stdout and exit. 195 print second_child 196 sys.exit(0) 197 # Redirect native stdin, stdout, stderr file descriptors to /dev/null. 198 si = open(os.devnull, 'r') 199 so = open(os.devnull, 'a+') 200 se = open(os.devnull, 'a+', 0) 201 os.dup2(si.fileno(), sys.stdin.fileno()) 202 os.dup2(so.fileno(), sys.stdout.fileno()) 203 os.dup2(se.fileno(), sys.stderr.fileno()) 204 # Send stderr to syslog. Note that this will only work for Python 205 # code in this process - it will not work for native code or child 206 # processes. If this is ever needed, use subprocess.Popen() to 207 # spawn logger(1) and connect its stdin fd with the stderr fd in 208 # this process. 209 sys.stderr = WriteToSyslog('update_engine_performance_monitor.py') 210 211 212if __name__ == '__main__': 213 parser = optparse.OptionParser() 214 parser.add_option('-v', '--verbose', action='store_true', 215 dest='verbose', help='print debug info to stderr') 216 parser.add_option('--timeout', action='store', type='int', default=3600, 217 dest='timeout_seconds', metavar='<SECONDS>', 218 help='maximum amount of time to monitor for') 219 parser.add_option('--start-bg', action='store_true', 220 dest='start_bg', help='start background instance ' 221 'and print its PID on stdout') 222 parser.add_option('--stop-bg', action='store', type='int', default=0, 223 dest='stop_bg', metavar='<PID>', 224 help='stop running background instance and dump ' 225 'its recorded data') 226 (options, args) = parser.parse_args() 227 228 monitor = UpdateEnginePerformanceMonitor(options.verbose, 229 options.timeout_seconds) 230 if options.start_bg: 231 # If starting a background instance, fork a child and write 232 # its PID on stdout in the parent process. In the child 233 # process, setup a FIFO and monitor until SIGTERM is 234 # called. When that happes, write the JSON result to the FIFO. 235 # 236 # Since this is expected to be called via ssh we need to 237 # completely detach from the session - otherwise the remote 238 # ssh(1) invocation will hang until our background instance is 239 # gone. 240 daemonize_and_print_pid_on_stdout() 241 # Prepare the FIFO ahead of time since it'll serve as an extra 242 # sanity check in --stop-bg before sending SIGTERM to the 243 # given pid. 244 instance_pid = os.getpid() 245 fifo_path = RESULT_FIFO_PREFIX + ('-pid-%d' % instance_pid) 246 if os.path.exists(fifo_path): 247 os.unlink(fifo_path) 248 os.mkfifo(fifo_path) 249 # Now monitor. 250 sys.stderr.write('Starting background collection.\n') 251 json_str = monitor.run(signal.SIGTERM) 252 sys.stderr.write('Stopping background collection.\n') 253 if json_str: 254 fifo = open(fifo_path, 'w') 255 fifo.write(json_str) 256 fifo.close() 257 os.unlink(fifo_path) 258 elif options.stop_bg: 259 # If stopping a background instance, check that the FIFO is 260 # really there and if so, signal the monitoring process and 261 # wait for it to write the JSON result on the FIFO. 262 instance_pid = options.stop_bg 263 fifo_path = RESULT_FIFO_PREFIX + ('-pid-%d' % instance_pid) 264 if not os.path.exists(fifo_path): 265 sys.stderr.write('No instance with PID %d. Check syslog for ' 266 'messages.\n' % instance_pid) 267 sys.exit(1) 268 os.kill(instance_pid, signal.SIGTERM) 269 fifo = open(fifo_path, 'r') 270 json_str = fifo.read() 271 print json_str 272 fifo.close() 273 else: 274 # Monitor in foreground until Ctrl+C is pressed, then dump 275 # JSON on stdout. This is useful for hacking on this script, 276 # especially in conjunction with --verbose. 277 json_str = monitor.run(signal.SIGINT) 278 if json_str: 279 print json_str 280