• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2009 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import glob
6import logging
7import re
8import os
9import shutil
10import time
11import utils
12
13from autotest_lib.client.bin import test
14from autotest_lib.client.common_lib import error
15
16class platform_BootPerf(test.test):
17    """Test to gather recorded boot time statistics.
18
19    The primary function of this test is to gather a rather large
20    assortment of performance keyvals that capture timing and disk
21    usage statistics associated with the most recent boot or reboot.
22
23    The test calculates some or all of the following keyvals:
24      * seconds_kernel_to_startup
25      * seconds_kernel_to_startup_done
26      * seconds_kernel_to_chrome_exec
27      * seconds_kernel_to_chrome_main
28      * seconds_kernel_to_signin_start
29      * seconds_kernel_to_signin_wait
30      * seconds_kernel_to_signin_users
31      * seconds_kernel_to_login
32      * seconds_kernel_to_network
33      * seconds_startup_to_chrome_exec
34      * seconds_chrome_exec_to_login
35      * rdbytes_kernel_to_startup
36      * rdbytes_kernel_to_startup_done
37      * rdbytes_kernel_to_chrome_exec
38      * rdbytes_kernel_to_chrome_main
39      * rdbytes_kernel_to_login
40      * rdbytes_startup_to_chrome_exec
41      * rdbytes_chrome_exec_to_login
42      * seconds_power_on_to_kernel
43      * seconds_power_on_to_login
44      * seconds_shutdown_time
45      * seconds_reboot_time
46      * seconds_reboot_error
47      * mhz_primary_cpu
48
49    """
50
51    version = 2
52
53    # Names of keyvals, their associated bootstat events, and 'Required' flag.
54    # Test fails if a required event is not found.
55    # Each event samples statistics measured since kernel startup
56    # at a specific moment on the boot critical path:
57    #   pre-startup - The start of the `chromeos_startup` script;
58    #     roughly, the time when /sbin/init emits the `startup`
59    #     Upstart event.
60    #   post-startup - Completion of the `chromeos_startup` script.
61    #   chrome-exec - The moment when session_manager exec's the
62    #     first Chrome process.
63    #   chrome-main - The moment when the first Chrome process
64    #     begins executing in main().
65    #   kernel_to_signin_start - The moment when LoadPage(loginSceenURL)
66    #     is called, i.e. initialization starts.
67    #   kernel_to_signin_wait - The moment when UI thread has finished signin
68    #     screen initialization and now waits until JS sends "ready" event.
69    #   kernel_to_signin_users - The moment when UI thread receives "ready" from
70    #     JS code. So V8 is initialized and running, etc...
71    #   kernel_to_login - The moment when user can actually see signin UI.
72    #   kernel_to_android_start - The moment when Android is started.
73    _EVENT_KEYVALS = [
74        # N.B.  Keyval attribute names go into a database that
75        # truncates after 30 characters.  The key names below are
76        # prefixed with 8 characters, either 'seconds_' or
77        # 'rdbytes_', so we have 22 characters wiggle room.
78        #
79        # ----+----1----+----2--
80        ('kernel_to_startup',       'pre-startup',                     True),
81        ('kernel_to_startup_done',  'post-startup',                    True),
82        ('kernel_to_chrome_exec',   'chrome-exec',                     True),
83        ('kernel_to_chrome_main',   'chrome-main',                     True),
84        # These two events do not happen if device is in OOBE.
85        ('kernel_to_signin_start',  'login-start-signin-screen',       False),
86        ('kernel_to_signin_wait',
87            'login-wait-for-signin-state-initialize',                  False),
88        # This event doesn't happen if device has no users.
89        ('kernel_to_signin_users',  'login-send-user-list',            False),
90        ('kernel_to_login',         'login-prompt-visible',            True),
91        # Not all boards support ARC.
92        ('kernel_to_android_start', 'android-start',                   False)
93    ]
94
95    _CPU_FREQ_FILE = ('/sys/devices/system/cpu/cpu0'
96                      '/cpufreq/cpuinfo_max_freq')
97
98    _UPTIME_PREFIX = 'uptime-'
99    _DISK_PREFIX = 'disk-'
100
101    _FIRMWARE_TIME_FILE = '/tmp/firmware-boot-time'
102
103    _BOOTSTAT_ARCHIVE_GLOB = '/var/log/metrics/shutdown.[0-9]*'
104    _UPTIME_FILE_GLOB = os.path.join('/tmp', _UPTIME_PREFIX + '*')
105    _DISK_FILE_GLOB = os.path.join('/tmp', _DISK_PREFIX + '*')
106
107    # The name of this file has changed starting with linux-3.19.
108    # Use a glob to snarf up all existing records.
109    _RAMOOPS_FILE_GLOB = "/sys/fs/pstore/console-ramoops*"
110
111
112    def _copy_timestamp_files(self):
113        """Copy raw data files to the test results."""
114        statlist = (glob.glob(self._UPTIME_FILE_GLOB) +
115                            glob.glob(self._DISK_FILE_GLOB))
116        for fname in statlist:
117            shutil.copy(fname, self.resultsdir)
118        try:
119            shutil.copy(self._FIRMWARE_TIME_FILE, self.resultsdir)
120        except Exception:
121            pass
122
123    def _copy_console_ramoops(self):
124        """Copy console_ramoops from previous reboot."""
125        # If reboot was misbehaving, looking at ramoops may provide clues.
126        for path in glob.glob(self._RAMOOPS_FILE_GLOB):
127            try:
128                shutil.copy(path, self.resultsdir)
129                break
130            except Exception:
131                pass
132
133    def _parse_bootstat(self, filename, fieldnum, required=False):
134        """Read values from a bootstat event file.
135
136        Each line of a bootstat event file represents one occurrence
137        of the event.  Each line is a copy of the content of
138        /proc/uptime ("uptime-" files) or /sys/block/<dev>/stat
139        ("disk-" files), captured at the time of the occurrence.
140        For either kind of file, each line is a blank separated list
141        of fields.
142
143        The given event file can contain either uptime or disk data.
144        This function reads all lines (occurrences) in the event
145        file, and returns the value of the given field
146
147        @param filename         Filename of the bootstat event.
148        @param fieldnum         Which field of the file.
149        @return                 List of values of `fieldnum` for
150                                all occurrences in the file.
151        @raises error.TestFail  Raised if the event file is missing,
152                                unreadable, or malformed.
153
154        """
155        try:
156            # crbug.com/1098635: racing with chrome browser
157            #  See external/chromium_org/chrome/browser/chromeos/boot_times_loader.cc
158            if required:
159                cnt = 0
160                while cnt < 30:
161                    if os.path.exists(filename):
162                        break
163                    time.sleep(1)
164                    cnt += 1
165
166                if cnt :
167                    logging.warning("Waited %d seconds for bootstat file: %s", cnt, filename)
168
169            with open(filename) as statfile:
170                values = map(lambda l: float(l.split()[fieldnum]),
171                             statfile.readlines())
172            return values
173        except IOError:
174            raise error.TestFail('Failed to read bootstat file "%s"' %
175                                 filename)
176
177
178    def _parse_uptime(self, eventname, bootstat_dir='/tmp', index=0, required=False):
179        """Return time since boot for a bootstat event.
180
181        @param eventname        Name of the bootstat event.
182        @param boostat_dir      Directory containing the bootstat
183                                files.
184        @param index            Index of which occurrence of the event
185                                to select.
186        @param required         If the parameter is required, wait for it.
187        @return                 Time since boot for the selected
188                                event.
189
190        """
191        event_file = os.path.join(bootstat_dir,
192                                  self._UPTIME_PREFIX) + eventname
193        return self._parse_bootstat(event_file, 0)[index]
194
195
196    def _parse_diskstat(self, eventname, bootstat_dir='/tmp', index=0):
197        """Return sectors read since boot for a bootstat event.
198
199        @param eventname        Name of the bootstat event.
200        @param boostat_dir      Directory containing the bootstat files.
201        @param index            Index of which occurrence of the event
202                                to select.
203        @return                 Number of sectors read since boot for
204                                the selected event.
205
206        """
207        event_file = os.path.join(bootstat_dir,
208                                  self._DISK_PREFIX) + eventname
209        return self._parse_bootstat(event_file, 2)[index]
210
211
212    def _gather_firmware_boot_time(self, results):
213        """Read and report firmware startup time.
214
215        send-boot-metrics.cong writes the firmware startup time to the
216        file named in `_FIRMWARE_TIME_FILE`.  Read the time and record
217        it in `results` as the keyval seconds_power_on_to_kernel.
218
219        @param results  Keyvals dictionary.
220
221        """
222
223        # crbug.com/1098635 - don't race with send-boot-metrics.conf
224        # TODO(grundler): directly read the firmware_time instead of depending
225        # on send-boot-metrics to create _FIRMWARE_TIME_FILE.
226        cnt = 1
227        while cnt < 60:
228            if  os.path.exists(self._FIRMWARE_TIME_FILE):
229                break
230            time.sleep(1)
231            cnt += 1
232
233        # If the firmware boot time is not available, the file
234        # will not exist and we should throw an exception here.
235        data = utils.read_one_line(self._FIRMWARE_TIME_FILE)
236
237        firmware_time = float(data)
238        boot_time = results['seconds_kernel_to_login']
239        results['seconds_power_on_to_kernel'] = firmware_time
240        results['seconds_power_on_to_login'] = (
241                round(firmware_time + boot_time, 2))
242
243
244    def _gather_time_keyvals(self, results):
245        """Read and report boot time keyvals.
246
247        Read "seconds since kernel startup" from the bootstat files
248        for the events named in `_EVENT_KEYVALS`, and store the
249        values as perf keyvals.  The following keyvals are recorded:
250          * seconds_kernel_to_startup
251          * seconds_kernel_to_startup_done
252          * seconds_kernel_to_chrome_exec
253          * seconds_kernel_to_chrome_main
254          * seconds_kernel_to_login
255          * seconds_kernel_to_network
256        All of these keyvals are considered mandatory, except
257        for seconds_kernel_to_network.
258
259        @param results          Keyvals dictionary.
260        @raises error.TestFail  Raised if any mandatory keyval can't
261                                be determined.
262
263        """
264        for keyval_name, event_name, required in self._EVENT_KEYVALS:
265            key = 'seconds_' + keyval_name
266            try:
267                results[key] = self._parse_uptime(event_name, required=required)
268            except error.TestFail:
269                if required:
270                    raise;
271
272        # Not all 'uptime-network-*-ready' files necessarily exist;
273        # probably there's only one.  We go through a list of
274        # possibilities and pick the earliest one we find.  We're not
275        # looking for 3G here, so we're not guaranteed to find any
276        # file.
277        network_ready_events = [
278            'network-wifi-ready',
279            'network-ethernet-ready'
280        ]
281        network_ready_timestamp = float('inf')
282        for event_name in network_ready_events:
283            metric_name = ('seconds_kernel_to_' +
284                           event_name.replace('-', '_'))
285            try:
286                network_time = self._parse_uptime(event_name)
287                results[metric_name] = network_time
288                if network_time < network_ready_timestamp:
289                    network_ready_timestamp = network_time
290                    results['seconds_kernel_to_network'] = network_time
291            except error.TestFail:
292                pass
293
294
295    def _gather_disk_keyvals(self, results):
296        """Read and report disk read keyvals.
297
298        Read "sectors read since kernel startup" from the bootstat
299        files for the events named in `_EVENT_KEYVALS`, convert the
300        values to "bytes read since boot", and store the values as
301        perf keyvals.  The following keyvals are recorded:
302          * rdbytes_kernel_to_startup
303          * rdbytes_kernel_to_startup_done
304          * rdbytes_kernel_to_chrome_exec
305          * rdbytes_kernel_to_chrome_main
306          * rdbytes_kernel_to_login
307
308        Disk statistics are reported in units of 512 byte sectors;
309        we convert the keyvals to bytes so that downstream consumers
310        don't have to ask "How big is a sector?".
311
312        @param results  Keyvals dictionary.
313
314        """
315        # We expect an error when reading disk statistics for the
316        # "chrome-main" event because Chrome (not bootstat) generates
317        # that event, and it doesn't include the disk statistics.
318        # We get around that by ignoring all errors.
319        for keyval_name, event_name, required in self._EVENT_KEYVALS:
320            try:
321                key = 'rdbytes_' + keyval_name
322                results[key] = 512 * self._parse_diskstat(event_name)
323            except Exception:
324                pass
325
326
327    def _calculate_timeval(self, event, t0, t1, t_uptime):
328        """Estimate the absolute time of a time since boot.
329
330        Input values `event` and `t_uptime` are times measured as
331        seconds since boot (for the same boot event, as from
332        /proc/uptime).  The input values `t0` and `t1` are two
333        values measured as seconds since the epoch.  The three "t"
334        values were sampled in the order `t0`, `t_uptime`, `t1`.
335
336        Estimate the time of `event` measured as seconds since the
337        epoch.  Also estimate the worst-case error based on the time
338        elapsed between `t0` and `t1`.
339
340        All values are floats.  The precision of `event` and
341        `t_uptime` is expected to be kernel jiffies (i.e. one
342        centisecond).  The output result is rounded to the nearest
343        jiffy.
344
345        @param event    A time to be converted from "seconds since
346                        boot" into "seconds since the epoch".
347        @param t0       A reference time earlier than time `t1`, and
348                        measured as "seconds since the epoch".
349        @param t1       A reference time later than time `t0`, and
350                        measured as "seconds since the epoch".
351        @param t_uptime A reference time measured as "seconds since
352                        boot", in between time `t0` and `t1`.
353
354        @return         An estimate of the time of `event` measured
355                        as seconds since the epoch, rounded to the
356                        nearest jiffy.
357
358        """
359        # Floating point geeks may argue that these calculations
360        # don't guarantee the promised precision:  I don't care;
361        # it's good enough.
362        boot_timeval = round((t0 + t1) / 2, 2) - t_uptime
363        error = (t1 - t0) / 2
364        return boot_timeval + event, error
365
366
367    def _gather_reboot_keyvals(self, results):
368        """Read and report shutdown and reboot times.
369
370        The shutdown process saves all bootstat files in /var/log,
371        plus it saves a timestamp file that can be used to convert
372        "time since boot" into times in UTC.  Read the saved files
373        from the most recent shutdown, and use them to calculate
374        the time spent from the start of that shutdown until the
375        completion of the most recent boot.  Record these keyvals:
376          * seconds_shutdown_time
377          * seconds_reboot_time
378          * seconds_reboot_error
379
380        @param results  Keyvals dictionary.
381
382        """
383        bootstat_archives = glob.glob(self._BOOTSTAT_ARCHIVE_GLOB)
384        if not bootstat_archives:
385            return
386        bootstat_dir = max(bootstat_archives)
387        boot_id = open("/proc/sys/kernel/random/boot_id", "r").read()
388        didrun_path = os.path.join(bootstat_dir, "bootperf_ran")
389        if not os.path.exists(didrun_path):
390            with open(didrun_path, "w") as didrun:
391                didrun.write(boot_id)
392        elif open(didrun_path, "r").read() != boot_id:
393            logging.warning("Ignoring reboot based on stale shutdown %s",
394                         os.path.basename(bootstat_dir))
395            return
396        timestamp_path = os.path.join(bootstat_dir, 'timestamp')
397        try:
398            with open(timestamp_path) as timestamp:
399                archive_t0 = float(timestamp.readline())
400                archive_t1 = float(timestamp.readline())
401        except IOError:
402            raise error.TestFail('Failed to read "%s"' % timestamp_path)
403        archive_uptime = self._parse_uptime('archive',
404                                            bootstat_dir=bootstat_dir)
405        shutdown_uptime = self._parse_uptime('ui-post-stop',
406                                             bootstat_dir=bootstat_dir,
407                                             index=-1)
408        shutdown_timeval, shutdown_error = self._calculate_timeval(
409                shutdown_uptime, archive_t0, archive_t1, archive_uptime)
410        boot_t0 = time.time()
411        with open('/proc/uptime') as uptime_file:
412            uptime = float(uptime_file.readline().split()[0])
413        boot_t1 = time.time()
414        boot_timeval, boot_error = self._calculate_timeval(
415                results['seconds_kernel_to_login'],
416                boot_t0, boot_t1, uptime)
417        reboot_time = round(boot_timeval - shutdown_timeval, 2)
418        poweron_time = results['seconds_power_on_to_login']
419        shutdown_time = round(reboot_time - poweron_time, 2)
420        results['seconds_reboot_time'] = reboot_time
421        results['seconds_reboot_error'] = shutdown_error + boot_error
422        results['seconds_shutdown_time'] = shutdown_time
423
424
425    def _calculate_diff(self, results):
426        barriers = ['startup', 'chrome_exec', 'login']
427        for i in range(len(barriers) - 1):
428            for type in ['seconds', 'rdbytes']:
429                begin = '%s_kernel_to_%s' % (type, barriers[i])
430                end = '%s_kernel_to_%s' % (type, barriers[i + 1])
431                if begin in results and end in results:
432                    diff_name = '%s_%s_to_%s' % (type, barriers[i], barriers[i + 1])
433                    results[diff_name] = results[end] - results[begin]
434
435
436    def run_once(self):
437        """Gather boot time statistics.
438
439        Every shutdown and boot creates `bootstat` files with
440        summary statistics for time elapsed and disk usage.  Gather
441        the values reported for shutdown, boot time and network
442        startup time, and record them as perf keyvals.
443
444        Additionally, gather information about firmware startup time
445        from various sources, and record the times as perf keyvals.
446
447        Finally, copy the raw data files to the results directory
448        for reference.
449
450        """
451        # `results` is the keyvals dictionary
452        results = {}
453
454        self._gather_time_keyvals(results)
455        self._gather_disk_keyvals(results)
456        self._gather_firmware_boot_time(results)
457        self._gather_reboot_keyvals(results)
458        self._calculate_diff(results)
459
460        self._copy_timestamp_files()
461        self._copy_console_ramoops()
462
463        self.write_perf_keyval(results)
464
465        if utils.system('crossystem mainfw_type?normal',
466                        ignore_status=True) != 0:
467            raise error.TestNAError(
468                    'Firmware boot times are not accurate in developer mode. '
469                    'Please run this test in normal mode.')
470        if 'EC returned from reboot' in utils.system_output('cbmem -1'):
471            raise error.TestNAError(
472                    'Firmware boot times should be measured without an EC reboot. '
473                    'Please warm reboot this system by running the "reboot" userspace command, then rerun the test.'
474            )
475        # This is looking for the LB_TAG_SERIAL entry in the coreboot table.
476        # 0x0000000f = tag, 0x00000020 = size, 0x0001c200 = 115200 baud.
477        if re.search('\x0f\0\0\0\x20\0\0\0........\0\xc2\x01\0',
478                     utils.system_output('cbmem -r 43425442')):
479            raise error.TestNAError(
480                    'Firmware boot times should be measured without serial output. '
481                    'Please rerun this test with a production image (image.bin, not image.dev.bin).'
482            )
483