• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# Copyright 2014 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import errno
7import json
8import mmap
9import optparse
10import os
11import signal
12import sys
13import syslog
14import time
15
16# The prefix of FIFO files used when using background processes.
17RESULT_FIFO_PREFIX = '/tmp/update_engine_performance_monitor_fifo'
18
19class UpdateEnginePerformanceMonitor(object):
20    """Performance and resource usage monitor script.
21
22    This script is intended to run on the DUT and will dump
23    performance data as a JSON document when done. It can be run in
24    the background using the --start-bg and --stop-bg options.
25    """
26
27    def __init__(self, verbose, timeout_seconds):
28        """Instance initializer.
29
30        @param verbose:  if True, prints debug info stderr.
31
32        @param timeout_seconds: maximum amount of time to monitor for.
33        """
34        self.verbose = verbose
35        self.timeout_seconds = timeout_seconds
36
37
38    @staticmethod
39    def get_update_engine_pids():
40        """Gets all processes (tasks) in the update-engine cgroup.
41
42        @return  a list of process identifiers.
43        """
44        with open('/sys/fs/cgroup/cpu/update-engine/tasks') as f:
45            return [int(i) for i in f.read().split()]
46
47
48    @staticmethod
49    def get_info_for_pid(pid, pids_processed):
50        """Get information about a process.
51
52        The returned information is a tuple where the first element is
53        the process name and the second element is the RSS size in
54        bytes. The task and its siblings (e.g. tasks belonging to the
55        same process) will be set in the |pids_processed| set.
56
57        @param pid:            the task to get information about.
58
59        @param pids_processed: set of process identifiers.
60
61        @return                a tuple with information.
62        """
63        try:
64            with open('/proc/%d/stat' % pid) as f:
65                fields = f.read().split()
66            # According to the proc(4) man page, field 23 is the
67            # number of pages in the resident set.
68            comm = fields[1]
69            rss = int(fields[23]) * mmap.PAGESIZE
70            tasks = os.listdir('/proc/%d/task'%pid)
71            # Mark all tasks belonging to the process to avoid
72            # double-counting their RSS.
73            for t in tasks:
74                pids_processed.add(int(t))
75            return rss, comm
76        except (IOError, OSError) as e:
77            # It's possible that the task vanished in the window
78            # between reading the 'tasks' file and when attempting to
79            # read from it (ditto for iterating over the 'task'
80            # directory). Handle this gracefully.
81            if e.errno == errno.ENOENT:
82                return 0, ''
83            raise
84
85
86    def do_sample(self):
87        """Sampling method.
88
89        This collects information about all the processes in the
90        update-engine cgroup. The information is used to e.g. maintain
91        historical peaks etc.
92        """
93        if self.verbose:
94            sys.stderr.write('========================================\n')
95        rss_total = 0
96        pids = self.get_update_engine_pids()
97        pids_processed = set()
98        # Loop over all PIDs (tasks) in the update-engine cgroup and
99        # be careful not to double-count PIDs (tasks) belonging to the
100        # same process.
101        for pid in pids:
102            if pid not in pids_processed:
103                rss, comm = self.get_info_for_pid(pid, pids_processed)
104                rss_total += rss
105                if self.verbose:
106                    sys.stderr.write('pid %d %s -> %d KiB\n' %
107                                     (pid, comm, rss/1024))
108            else:
109                if self.verbose:
110                    sys.stderr.write('pid %d already counted\n' % pid)
111        self.rss_peak = max(rss_total, self.rss_peak)
112        if self.verbose:
113            sys.stderr.write('Total = %d KiB\n' % (rss_total / 1024))
114            sys.stderr.write('Peak  = %d KiB\n' % (self.rss_peak / 1024))
115
116
117    def signal_handler(self, signal, frame):
118        """Signal handler used to terminate monitoring.
119
120        @param signal: the signal delivered.
121
122        @param frame:  the interrupted stack frame.
123        """
124        self.request_exit = True
125
126
127    def run(self, signum):
128        """Main sampling loop.
129
130        Periodically sample and process performance data until the
131        signal specified by |signum| is sent to the
132        process. Returns recorded data as a string.
133
134        @param signum:  the signal to wait (e.g. signal.SIGTERM) or None.
135
136        @return  a string with JSON data or None if the timeout
137                 deadline has been exceeded.
138        """
139        if signum:
140            signal.signal(signum, self.signal_handler)
141        self.rss_peak = 0
142        self.request_exit = False
143        timeout_deadline = time.time() + self.timeout_seconds
144        while not self.request_exit:
145            monitor.do_sample()
146            time.sleep(0.1)
147            if time.time() > timeout_deadline:
148                return None
149        return json.dumps({'rss_peak': self.rss_peak})
150
151
152class WriteToSyslog:
153    """File-like object to log messages to syslog.
154
155    Instances of this object can be assigned to e.g. sys.stderr to log
156    errors/backtraces to syslog.
157    """
158
159    def __init__(self, ident):
160        """Instance initializer.
161
162        @param ident:  string to identify program by.
163        """
164        syslog.openlog(ident, syslog.LOG_PID, syslog.LOG_DAEMON)
165
166
167    def write(self, data):
168        """Overridden write() method.
169
170        @param data:  the data to write.
171        """
172        syslog.syslog(syslog.LOG_ERR, data)
173
174
175def daemonize_and_print_pid_on_stdout():
176    """Daemonizes and prints the daemon process pid on stdout and
177    exits.
178
179    When this function returns, the process is a properly detached daemon
180    process parented by pid 1. This is basically the standard double-fork
181    daemonization dance as described in W. Richard Stevens, "Advanced
182    Programming in the Unix Environment", 1992, Addison-Wesley, ISBN
183    0-201-56317-7
184    """
185    first_child = os.fork()
186    if first_child != 0:
187        # Exit first child.
188        sys.exit(0)
189    os.chdir('/')
190    os.setsid()
191    os.umask(0)
192    second_child = os.fork()
193    if second_child != 0:
194        # Parent, write child pid to stdout and exit.
195        print second_child
196        sys.exit(0)
197    # Redirect native stdin, stdout, stderr file descriptors to /dev/null.
198    si = open(os.devnull, 'r')
199    so = open(os.devnull, 'a+')
200    se = open(os.devnull, 'a+', 0)
201    os.dup2(si.fileno(), sys.stdin.fileno())
202    os.dup2(so.fileno(), sys.stdout.fileno())
203    os.dup2(se.fileno(), sys.stderr.fileno())
204    # Send stderr to syslog. Note that this will only work for Python
205    # code in this process - it will not work for native code or child
206    # processes. If this is ever needed, use subprocess.Popen() to
207    # spawn logger(1) and connect its stdin fd with the stderr fd in
208    # this process.
209    sys.stderr = WriteToSyslog('update_engine_performance_monitor.py')
210
211
212if __name__ == '__main__':
213    parser = optparse.OptionParser()
214    parser.add_option('-v', '--verbose', action='store_true',
215                      dest='verbose', help='print debug info to stderr')
216    parser.add_option('--timeout', action='store', type='int', default=3600,
217                      dest='timeout_seconds', metavar='<SECONDS>',
218                      help='maximum amount of time to monitor for')
219    parser.add_option('--start-bg', action='store_true',
220                      dest='start_bg', help='start background instance '
221                      'and print its PID on stdout')
222    parser.add_option('--stop-bg', action='store', type='int', default=0,
223                      dest='stop_bg', metavar='<PID>',
224                      help='stop running background instance and dump '
225                      'its recorded data')
226    (options, args) = parser.parse_args()
227
228    monitor = UpdateEnginePerformanceMonitor(options.verbose,
229                                             options.timeout_seconds)
230    if options.start_bg:
231        # If starting a background instance, fork a child and write
232        # its PID on stdout in the parent process. In the child
233        # process, setup a FIFO and monitor until SIGTERM is
234        # called. When that happes, write the JSON result to the FIFO.
235        #
236        # Since this is expected to be called via ssh we need to
237        # completely detach from the session - otherwise the remote
238        # ssh(1) invocation will hang until our background instance is
239        # gone.
240        daemonize_and_print_pid_on_stdout()
241        # Prepare the FIFO ahead of time since it'll serve as an extra
242        # sanity check in --stop-bg before sending SIGTERM to the
243        # given pid.
244        instance_pid = os.getpid()
245        fifo_path = RESULT_FIFO_PREFIX + ('-pid-%d' % instance_pid)
246        if os.path.exists(fifo_path):
247            os.unlink(fifo_path)
248        os.mkfifo(fifo_path)
249        # Now monitor.
250        sys.stderr.write('Starting background collection.\n')
251        json_str = monitor.run(signal.SIGTERM)
252        sys.stderr.write('Stopping background collection.\n')
253        if json_str:
254            fifo = open(fifo_path, 'w')
255            fifo.write(json_str)
256            fifo.close()
257        os.unlink(fifo_path)
258    elif options.stop_bg:
259        # If stopping a background instance, check that the FIFO is
260        # really there and if so, signal the monitoring process and
261        # wait for it to write the JSON result on the FIFO.
262        instance_pid = options.stop_bg
263        fifo_path = RESULT_FIFO_PREFIX + ('-pid-%d' % instance_pid)
264        if not os.path.exists(fifo_path):
265            sys.stderr.write('No instance with PID %d. Check syslog for '
266                             'messages.\n' % instance_pid)
267            sys.exit(1)
268        os.kill(instance_pid, signal.SIGTERM)
269        fifo = open(fifo_path, 'r')
270        json_str = fifo.read()
271        print json_str
272        fifo.close()
273    else:
274        # Monitor in foreground until Ctrl+C is pressed, then dump
275        # JSON on stdout. This is useful for hacking on this script,
276        # especially in conjunction with --verbose.
277        json_str = monitor.run(signal.SIGINT)
278        if json_str:
279            print json_str
280