1# Copyright (c) 2009 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import glob 6import logging 7import re 8import os 9import shutil 10import time 11import utils 12 13from autotest_lib.client.bin import test 14from autotest_lib.client.common_lib import error 15 16class platform_BootPerf(test.test): 17 """Test to gather recorded boot time statistics. 18 19 The primary function of this test is to gather a rather large 20 assortment of performance keyvals that capture timing and disk 21 usage statistics associated with the most recent boot or reboot. 22 23 The test calculates some or all of the following keyvals: 24 * seconds_kernel_to_startup 25 * seconds_kernel_to_startup_done 26 * seconds_kernel_to_chrome_exec 27 * seconds_kernel_to_chrome_main 28 * seconds_kernel_to_signin_start 29 * seconds_kernel_to_signin_wait 30 * seconds_kernel_to_signin_users 31 * seconds_kernel_to_login 32 * seconds_kernel_to_network 33 * seconds_startup_to_chrome_exec 34 * seconds_chrome_exec_to_login 35 * rdbytes_kernel_to_startup 36 * rdbytes_kernel_to_startup_done 37 * rdbytes_kernel_to_chrome_exec 38 * rdbytes_kernel_to_chrome_main 39 * rdbytes_kernel_to_login 40 * rdbytes_startup_to_chrome_exec 41 * rdbytes_chrome_exec_to_login 42 * seconds_power_on_to_kernel 43 * seconds_power_on_to_login 44 * seconds_shutdown_time 45 * seconds_reboot_time 46 * seconds_reboot_error 47 * mhz_primary_cpu 48 49 """ 50 51 version = 2 52 53 # Names of keyvals, their associated bootstat events, and 'Required' flag. 54 # Test fails if a required event is not found. 55 # Each event samples statistics measured since kernel startup 56 # at a specific moment on the boot critical path: 57 # pre-startup - The start of the `chromeos_startup` script; 58 # roughly, the time when /sbin/init emits the `startup` 59 # Upstart event. 60 # post-startup - Completion of the `chromeos_startup` script. 61 # chrome-exec - The moment when session_manager exec's the 62 # first Chrome process. 63 # chrome-main - The moment when the first Chrome process 64 # begins executing in main(). 65 # kernel_to_signin_start - The moment when LoadPage(loginSceenURL) 66 # is called, i.e. initialization starts. 67 # kernel_to_signin_wait - The moment when UI thread has finished signin 68 # screen initialization and now waits until JS sends "ready" event. 69 # kernel_to_signin_users - The moment when UI thread receives "ready" from 70 # JS code. So V8 is initialized and running, etc... 71 # kernel_to_login - The moment when user can actually see signin UI. 72 # kernel_to_android_start - The moment when Android is started. 73 _EVENT_KEYVALS = [ 74 # N.B. Keyval attribute names go into a database that 75 # truncates after 30 characters. The key names below are 76 # prefixed with 8 characters, either 'seconds_' or 77 # 'rdbytes_', so we have 22 characters wiggle room. 78 # 79 # ----+----1----+----2-- 80 ('kernel_to_startup', 'pre-startup', True), 81 ('kernel_to_startup_done', 'post-startup', True), 82 ('kernel_to_chrome_exec', 'chrome-exec', True), 83 ('kernel_to_chrome_main', 'chrome-main', True), 84 # These two events do not happen if device is in OOBE. 85 ('kernel_to_signin_start', 'login-start-signin-screen', False), 86 ('kernel_to_signin_wait', 87 'login-wait-for-signin-state-initialize', False), 88 # This event doesn't happen if device has no users. 89 ('kernel_to_signin_users', 'login-send-user-list', False), 90 ('kernel_to_login', 'login-prompt-visible', True), 91 # Not all boards support ARC. 92 ('kernel_to_android_start', 'android-start', False) 93 ] 94 95 _CPU_FREQ_FILE = ('/sys/devices/system/cpu/cpu0' 96 '/cpufreq/cpuinfo_max_freq') 97 98 _UPTIME_PREFIX = 'uptime-' 99 _DISK_PREFIX = 'disk-' 100 101 _FIRMWARE_TIME_FILE = '/tmp/firmware-boot-time' 102 103 _BOOTSTAT_ARCHIVE_GLOB = '/var/log/metrics/shutdown.[0-9]*' 104 _UPTIME_FILE_GLOB = os.path.join('/tmp', _UPTIME_PREFIX + '*') 105 _DISK_FILE_GLOB = os.path.join('/tmp', _DISK_PREFIX + '*') 106 107 # The name of this file has changed starting with linux-3.19. 108 # Use a glob to snarf up all existing records. 109 _RAMOOPS_FILE_GLOB = "/sys/fs/pstore/console-ramoops*" 110 111 112 def _copy_timestamp_files(self): 113 """Copy raw data files to the test results.""" 114 statlist = (glob.glob(self._UPTIME_FILE_GLOB) + 115 glob.glob(self._DISK_FILE_GLOB)) 116 for fname in statlist: 117 shutil.copy(fname, self.resultsdir) 118 try: 119 shutil.copy(self._FIRMWARE_TIME_FILE, self.resultsdir) 120 except Exception: 121 pass 122 123 def _copy_console_ramoops(self): 124 """Copy console_ramoops from previous reboot.""" 125 # If reboot was misbehaving, looking at ramoops may provide clues. 126 for path in glob.glob(self._RAMOOPS_FILE_GLOB): 127 try: 128 shutil.copy(path, self.resultsdir) 129 break 130 except Exception: 131 pass 132 133 def _parse_bootstat(self, filename, fieldnum, required=False): 134 """Read values from a bootstat event file. 135 136 Each line of a bootstat event file represents one occurrence 137 of the event. Each line is a copy of the content of 138 /proc/uptime ("uptime-" files) or /sys/block/<dev>/stat 139 ("disk-" files), captured at the time of the occurrence. 140 For either kind of file, each line is a blank separated list 141 of fields. 142 143 The given event file can contain either uptime or disk data. 144 This function reads all lines (occurrences) in the event 145 file, and returns the value of the given field 146 147 @param filename Filename of the bootstat event. 148 @param fieldnum Which field of the file. 149 @return List of values of `fieldnum` for 150 all occurrences in the file. 151 @raises error.TestFail Raised if the event file is missing, 152 unreadable, or malformed. 153 154 """ 155 try: 156 # crbug.com/1098635: racing with chrome browser 157 # See external/chromium_org/chrome/browser/chromeos/boot_times_loader.cc 158 if required: 159 cnt = 0 160 while cnt < 30: 161 if os.path.exists(filename): 162 break 163 time.sleep(1) 164 cnt += 1 165 166 if cnt : 167 logging.warning("Waited %d seconds for bootstat file: %s", cnt, filename) 168 169 with open(filename) as statfile: 170 values = map(lambda l: float(l.split()[fieldnum]), 171 statfile.readlines()) 172 return values 173 except IOError: 174 raise error.TestFail('Failed to read bootstat file "%s"' % 175 filename) 176 177 178 def _parse_uptime(self, eventname, bootstat_dir='/tmp', index=0, required=False): 179 """Return time since boot for a bootstat event. 180 181 @param eventname Name of the bootstat event. 182 @param boostat_dir Directory containing the bootstat 183 files. 184 @param index Index of which occurrence of the event 185 to select. 186 @param required If the parameter is required, wait for it. 187 @return Time since boot for the selected 188 event. 189 190 """ 191 event_file = os.path.join(bootstat_dir, 192 self._UPTIME_PREFIX) + eventname 193 return self._parse_bootstat(event_file, 0)[index] 194 195 196 def _parse_diskstat(self, eventname, bootstat_dir='/tmp', index=0): 197 """Return sectors read since boot for a bootstat event. 198 199 @param eventname Name of the bootstat event. 200 @param boostat_dir Directory containing the bootstat files. 201 @param index Index of which occurrence of the event 202 to select. 203 @return Number of sectors read since boot for 204 the selected event. 205 206 """ 207 event_file = os.path.join(bootstat_dir, 208 self._DISK_PREFIX) + eventname 209 return self._parse_bootstat(event_file, 2)[index] 210 211 212 def _gather_firmware_boot_time(self, results): 213 """Read and report firmware startup time. 214 215 send-boot-metrics.cong writes the firmware startup time to the 216 file named in `_FIRMWARE_TIME_FILE`. Read the time and record 217 it in `results` as the keyval seconds_power_on_to_kernel. 218 219 @param results Keyvals dictionary. 220 221 """ 222 223 # crbug.com/1098635 - don't race with send-boot-metrics.conf 224 # TODO(grundler): directly read the firmware_time instead of depending 225 # on send-boot-metrics to create _FIRMWARE_TIME_FILE. 226 cnt = 1 227 while cnt < 60: 228 if os.path.exists(self._FIRMWARE_TIME_FILE): 229 break 230 time.sleep(1) 231 cnt += 1 232 233 # If the firmware boot time is not available, the file 234 # will not exist and we should throw an exception here. 235 data = utils.read_one_line(self._FIRMWARE_TIME_FILE) 236 237 firmware_time = float(data) 238 boot_time = results['seconds_kernel_to_login'] 239 results['seconds_power_on_to_kernel'] = firmware_time 240 results['seconds_power_on_to_login'] = ( 241 round(firmware_time + boot_time, 2)) 242 243 244 def _gather_time_keyvals(self, results): 245 """Read and report boot time keyvals. 246 247 Read "seconds since kernel startup" from the bootstat files 248 for the events named in `_EVENT_KEYVALS`, and store the 249 values as perf keyvals. The following keyvals are recorded: 250 * seconds_kernel_to_startup 251 * seconds_kernel_to_startup_done 252 * seconds_kernel_to_chrome_exec 253 * seconds_kernel_to_chrome_main 254 * seconds_kernel_to_login 255 * seconds_kernel_to_network 256 All of these keyvals are considered mandatory, except 257 for seconds_kernel_to_network. 258 259 @param results Keyvals dictionary. 260 @raises error.TestFail Raised if any mandatory keyval can't 261 be determined. 262 263 """ 264 for keyval_name, event_name, required in self._EVENT_KEYVALS: 265 key = 'seconds_' + keyval_name 266 try: 267 results[key] = self._parse_uptime(event_name, required=required) 268 except error.TestFail: 269 if required: 270 raise; 271 272 # Not all 'uptime-network-*-ready' files necessarily exist; 273 # probably there's only one. We go through a list of 274 # possibilities and pick the earliest one we find. We're not 275 # looking for 3G here, so we're not guaranteed to find any 276 # file. 277 network_ready_events = [ 278 'network-wifi-ready', 279 'network-ethernet-ready' 280 ] 281 network_ready_timestamp = float('inf') 282 for event_name in network_ready_events: 283 metric_name = ('seconds_kernel_to_' + 284 event_name.replace('-', '_')) 285 try: 286 network_time = self._parse_uptime(event_name) 287 results[metric_name] = network_time 288 if network_time < network_ready_timestamp: 289 network_ready_timestamp = network_time 290 results['seconds_kernel_to_network'] = network_time 291 except error.TestFail: 292 pass 293 294 295 def _gather_disk_keyvals(self, results): 296 """Read and report disk read keyvals. 297 298 Read "sectors read since kernel startup" from the bootstat 299 files for the events named in `_EVENT_KEYVALS`, convert the 300 values to "bytes read since boot", and store the values as 301 perf keyvals. The following keyvals are recorded: 302 * rdbytes_kernel_to_startup 303 * rdbytes_kernel_to_startup_done 304 * rdbytes_kernel_to_chrome_exec 305 * rdbytes_kernel_to_chrome_main 306 * rdbytes_kernel_to_login 307 308 Disk statistics are reported in units of 512 byte sectors; 309 we convert the keyvals to bytes so that downstream consumers 310 don't have to ask "How big is a sector?". 311 312 @param results Keyvals dictionary. 313 314 """ 315 # We expect an error when reading disk statistics for the 316 # "chrome-main" event because Chrome (not bootstat) generates 317 # that event, and it doesn't include the disk statistics. 318 # We get around that by ignoring all errors. 319 for keyval_name, event_name, required in self._EVENT_KEYVALS: 320 try: 321 key = 'rdbytes_' + keyval_name 322 results[key] = 512 * self._parse_diskstat(event_name) 323 except Exception: 324 pass 325 326 327 def _calculate_timeval(self, event, t0, t1, t_uptime): 328 """Estimate the absolute time of a time since boot. 329 330 Input values `event` and `t_uptime` are times measured as 331 seconds since boot (for the same boot event, as from 332 /proc/uptime). The input values `t0` and `t1` are two 333 values measured as seconds since the epoch. The three "t" 334 values were sampled in the order `t0`, `t_uptime`, `t1`. 335 336 Estimate the time of `event` measured as seconds since the 337 epoch. Also estimate the worst-case error based on the time 338 elapsed between `t0` and `t1`. 339 340 All values are floats. The precision of `event` and 341 `t_uptime` is expected to be kernel jiffies (i.e. one 342 centisecond). The output result is rounded to the nearest 343 jiffy. 344 345 @param event A time to be converted from "seconds since 346 boot" into "seconds since the epoch". 347 @param t0 A reference time earlier than time `t1`, and 348 measured as "seconds since the epoch". 349 @param t1 A reference time later than time `t0`, and 350 measured as "seconds since the epoch". 351 @param t_uptime A reference time measured as "seconds since 352 boot", in between time `t0` and `t1`. 353 354 @return An estimate of the time of `event` measured 355 as seconds since the epoch, rounded to the 356 nearest jiffy. 357 358 """ 359 # Floating point geeks may argue that these calculations 360 # don't guarantee the promised precision: I don't care; 361 # it's good enough. 362 boot_timeval = round((t0 + t1) / 2, 2) - t_uptime 363 error = (t1 - t0) / 2 364 return boot_timeval + event, error 365 366 367 def _gather_reboot_keyvals(self, results): 368 """Read and report shutdown and reboot times. 369 370 The shutdown process saves all bootstat files in /var/log, 371 plus it saves a timestamp file that can be used to convert 372 "time since boot" into times in UTC. Read the saved files 373 from the most recent shutdown, and use them to calculate 374 the time spent from the start of that shutdown until the 375 completion of the most recent boot. Record these keyvals: 376 * seconds_shutdown_time 377 * seconds_reboot_time 378 * seconds_reboot_error 379 380 @param results Keyvals dictionary. 381 382 """ 383 bootstat_archives = glob.glob(self._BOOTSTAT_ARCHIVE_GLOB) 384 if not bootstat_archives: 385 return 386 bootstat_dir = max(bootstat_archives) 387 boot_id = open("/proc/sys/kernel/random/boot_id", "r").read() 388 didrun_path = os.path.join(bootstat_dir, "bootperf_ran") 389 if not os.path.exists(didrun_path): 390 with open(didrun_path, "w") as didrun: 391 didrun.write(boot_id) 392 elif open(didrun_path, "r").read() != boot_id: 393 logging.warning("Ignoring reboot based on stale shutdown %s", 394 os.path.basename(bootstat_dir)) 395 return 396 timestamp_path = os.path.join(bootstat_dir, 'timestamp') 397 try: 398 with open(timestamp_path) as timestamp: 399 archive_t0 = float(timestamp.readline()) 400 archive_t1 = float(timestamp.readline()) 401 except IOError: 402 raise error.TestFail('Failed to read "%s"' % timestamp_path) 403 archive_uptime = self._parse_uptime('archive', 404 bootstat_dir=bootstat_dir) 405 shutdown_uptime = self._parse_uptime('ui-post-stop', 406 bootstat_dir=bootstat_dir, 407 index=-1) 408 shutdown_timeval, shutdown_error = self._calculate_timeval( 409 shutdown_uptime, archive_t0, archive_t1, archive_uptime) 410 boot_t0 = time.time() 411 with open('/proc/uptime') as uptime_file: 412 uptime = float(uptime_file.readline().split()[0]) 413 boot_t1 = time.time() 414 boot_timeval, boot_error = self._calculate_timeval( 415 results['seconds_kernel_to_login'], 416 boot_t0, boot_t1, uptime) 417 reboot_time = round(boot_timeval - shutdown_timeval, 2) 418 poweron_time = results['seconds_power_on_to_login'] 419 shutdown_time = round(reboot_time - poweron_time, 2) 420 results['seconds_reboot_time'] = reboot_time 421 results['seconds_reboot_error'] = shutdown_error + boot_error 422 results['seconds_shutdown_time'] = shutdown_time 423 424 425 def _calculate_diff(self, results): 426 barriers = ['startup', 'chrome_exec', 'login'] 427 for i in range(len(barriers) - 1): 428 for type in ['seconds', 'rdbytes']: 429 begin = '%s_kernel_to_%s' % (type, barriers[i]) 430 end = '%s_kernel_to_%s' % (type, barriers[i + 1]) 431 if begin in results and end in results: 432 diff_name = '%s_%s_to_%s' % (type, barriers[i], barriers[i + 1]) 433 results[diff_name] = results[end] - results[begin] 434 435 436 def run_once(self): 437 """Gather boot time statistics. 438 439 Every shutdown and boot creates `bootstat` files with 440 summary statistics for time elapsed and disk usage. Gather 441 the values reported for shutdown, boot time and network 442 startup time, and record them as perf keyvals. 443 444 Additionally, gather information about firmware startup time 445 from various sources, and record the times as perf keyvals. 446 447 Finally, copy the raw data files to the results directory 448 for reference. 449 450 """ 451 # `results` is the keyvals dictionary 452 results = {} 453 454 self._gather_time_keyvals(results) 455 self._gather_disk_keyvals(results) 456 self._gather_firmware_boot_time(results) 457 self._gather_reboot_keyvals(results) 458 self._calculate_diff(results) 459 460 self._copy_timestamp_files() 461 self._copy_console_ramoops() 462 463 self.write_perf_keyval(results) 464 465 if utils.system('crossystem mainfw_type?normal', 466 ignore_status=True) != 0: 467 raise error.TestNAError( 468 'Firmware boot times are not accurate in developer mode. ' 469 'Please run this test in normal mode.') 470 if 'EC returned from reboot' in utils.system_output('cbmem -1'): 471 raise error.TestNAError( 472 'Firmware boot times should be measured without an EC reboot. ' 473 'Please warm reboot this system by running the "reboot" userspace command, then rerun the test.' 474 ) 475 # This is looking for the LB_TAG_SERIAL entry in the coreboot table. 476 # 0x0000000f = tag, 0x00000020 = size, 0x0001c200 = 115200 baud. 477 if re.search('\x0f\0\0\0\x20\0\0\0........\0\xc2\x01\0', 478 utils.system_output('cbmem -r 43425442')): 479 raise error.TestNAError( 480 'Firmware boot times should be measured without serial output. ' 481 'Please rerun this test with a production image (image.bin, not image.dev.bin).' 482 ) 483