1# Lint as: python2, python3 2# Copyright (c) 2009 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import glob 7import logging 8import re 9import os 10import shutil 11import time 12import utils 13 14from autotest_lib.client.bin import test 15from autotest_lib.client.common_lib import error 16 17class platform_BootPerf(test.test): 18 """Test to gather recorded boot time statistics. 19 20 The primary function of this test is to gather a rather large 21 assortment of performance keyvals that capture timing and disk 22 usage statistics associated with the most recent boot or reboot. 23 24 The test calculates some or all of the following keyvals: 25 * seconds_kernel_to_startup 26 * seconds_kernel_to_startup_done 27 * seconds_kernel_to_chrome_exec 28 * seconds_kernel_to_chrome_main 29 * seconds_kernel_to_signin_start 30 * seconds_kernel_to_signin_wait 31 * seconds_kernel_to_signin_users 32 * seconds_kernel_to_login 33 * seconds_kernel_to_network 34 * seconds_startup_to_chrome_exec 35 * seconds_chrome_exec_to_login 36 * rdbytes_kernel_to_startup 37 * rdbytes_kernel_to_startup_done 38 * rdbytes_kernel_to_chrome_exec 39 * rdbytes_kernel_to_chrome_main 40 * rdbytes_kernel_to_login 41 * rdbytes_startup_to_chrome_exec 42 * rdbytes_chrome_exec_to_login 43 * seconds_power_on_to_kernel 44 * seconds_power_on_to_login 45 * seconds_shutdown_time 46 * seconds_reboot_time 47 * seconds_reboot_error 48 * mhz_primary_cpu 49 50 """ 51 52 version = 2 53 54 # Names of keyvals, their associated bootstat events, and 'Required' flag. 55 # Test fails if a required event is not found. 56 # Each event samples statistics measured since kernel startup 57 # at a specific moment on the boot critical path: 58 # pre-startup - The start of the `chromeos_startup` script; 59 # roughly, the time when /sbin/init emits the `startup` 60 # Upstart event. 61 # post-startup - Completion of the `chromeos_startup` script. 62 # chrome-exec - The moment when session_manager exec's the 63 # first Chrome process. 64 # chrome-main - The moment when the first Chrome process 65 # begins executing in main(). 66 # kernel_to_signin_start - The moment when LoadPage(loginSceenURL) 67 # is called, i.e. initialization starts. 68 # kernel_to_signin_wait - The moment when UI thread has finished signin 69 # screen initialization and now waits until JS sends "ready" event. 70 # kernel_to_signin_users - The moment when UI thread receives "ready" from 71 # JS code. So V8 is initialized and running, etc... 72 # kernel_to_login - The moment when user can actually see signin UI. 73 # kernel_to_android_start - The moment when Android is started. 74 _EVENT_KEYVALS = [ 75 # N.B. Keyval attribute names go into a database that 76 # truncates after 30 characters. The key names below are 77 # prefixed with 8 characters, either 'seconds_' or 78 # 'rdbytes_', so we have 22 characters wiggle room. 79 # 80 # ----+----1----+----2-- 81 ('kernel_to_startup', 'pre-startup', True), 82 ('kernel_to_startup_done', 'post-startup', True), 83 ('kernel_to_chrome_exec', 'chrome-exec', True), 84 ('kernel_to_chrome_main', 'chrome-main', True), 85 # These two events do not happen if device is in OOBE. 86 ('kernel_to_signin_start', 'login-start-signin-screen', False), 87 ('kernel_to_signin_wait', 88 'login-wait-for-signin-state-initialize', False), 89 # This event doesn't happen if device has no users. 90 ('kernel_to_signin_users', 'login-send-user-list', False), 91 ('kernel_to_login', 'login-prompt-visible', True), 92 # Not all boards support ARC. 93 ('kernel_to_android_start', 'android-start', False) 94 ] 95 96 _CPU_FREQ_FILE = ('/sys/devices/system/cpu/cpu0' 97 '/cpufreq/cpuinfo_max_freq') 98 99 _UPTIME_PREFIX = 'uptime-' 100 _DISK_PREFIX = 'disk-' 101 _FW_TIMESTAMPS = 'cbmem-timestamps' 102 103 104 _BOOTSTAT_ARCHIVE_GLOB = '/var/log/metrics/shutdown.[0-9]*' 105 _UPTIME_FILE_GLOB = os.path.join('/tmp', _UPTIME_PREFIX + '*') 106 _DISK_FILE_GLOB = os.path.join('/tmp', _DISK_PREFIX + '*') 107 108 # The name of this file has changed starting with linux-3.19. 109 # Use a glob to snarf up all existing records. 110 _RAMOOPS_FILE_GLOB = "/sys/fs/pstore/console-ramoops*" 111 112 113 def _copy_timestamp_files(self): 114 """Copy raw data files to the test results.""" 115 statlist = (glob.glob(self._UPTIME_FILE_GLOB) + 116 glob.glob(self._DISK_FILE_GLOB)) 117 for fname in statlist: 118 shutil.copy(fname, self.resultsdir) 119 120 def _copy_console_ramoops(self): 121 """Copy console_ramoops from previous reboot.""" 122 # If reboot was misbehaving, looking at ramoops may provide clues. 123 for path in glob.glob(self._RAMOOPS_FILE_GLOB): 124 try: 125 shutil.copy(path, self.resultsdir) 126 break 127 except Exception: 128 pass 129 130 def _store_fw_timestamps(self): 131 """Store detailed firmware timestamps for debugging.""" 132 with open(os.path.join(self.resultsdir, self._FW_TIMESTAMPS), 133 'w') as f: 134 utils.run('cbmem -t', stdout_tee=f) 135 136 def _parse_bootstat(self, filename, fieldnum, required=False): 137 """Read values from a bootstat event file. 138 139 Each line of a bootstat event file represents one occurrence 140 of the event. Each line is a copy of the content of 141 /proc/uptime ("uptime-" files) or /sys/block/<dev>/stat 142 ("disk-" files), captured at the time of the occurrence. 143 For either kind of file, each line is a blank separated list 144 of fields. 145 146 The given event file can contain either uptime or disk data. 147 This function reads all lines (occurrences) in the event 148 file, and returns the value of the given field 149 150 @param filename Filename of the bootstat event. 151 @param fieldnum Which field of the file. 152 @return List of values of `fieldnum` for 153 all occurrences in the file. 154 @raises error.TestFail Raised if the event file is missing, 155 unreadable, or malformed. 156 157 """ 158 try: 159 # crbug.com/1098635: racing with chrome browser 160 # See external/chromium_org/chrome/browser/chromeos/boot_times_loader.cc 161 if required: 162 cnt = 0 163 while cnt < 30: 164 if os.path.exists(filename): 165 break 166 time.sleep(1) 167 cnt += 1 168 169 if cnt : 170 logging.warning("Waited %d seconds for bootstat file: %s", cnt, filename) 171 172 with open(filename) as statfile: 173 values = list(map(lambda l: float(l.split()[fieldnum]), 174 statfile.readlines())) 175 return values 176 except IOError: 177 raise error.TestFail('Failed to read bootstat file "%s"' % 178 filename) 179 180 181 def _parse_uptime(self, eventname, bootstat_dir='/tmp', index=0, required=False): 182 """Return time since boot for a bootstat event. 183 184 @param eventname Name of the bootstat event. 185 @param boostat_dir Directory containing the bootstat 186 files. 187 @param index Index of which occurrence of the event 188 to select. 189 @param required If the parameter is required, wait for it. 190 @return Time since boot for the selected 191 event. 192 193 """ 194 event_file = os.path.join(bootstat_dir, 195 self._UPTIME_PREFIX) + eventname 196 return self._parse_bootstat(event_file, 0)[index] 197 198 199 def _parse_diskstat(self, eventname, bootstat_dir='/tmp', index=0): 200 """Return sectors read since boot for a bootstat event. 201 202 @param eventname Name of the bootstat event. 203 @param boostat_dir Directory containing the bootstat files. 204 @param index Index of which occurrence of the event 205 to select. 206 @return Number of sectors read since boot for 207 the selected event. 208 209 """ 210 event_file = os.path.join(bootstat_dir, 211 self._DISK_PREFIX) + eventname 212 return self._parse_bootstat(event_file, 2)[index] 213 214 215 def _gather_firmware_boot_time(self, results): 216 """Read and report firmware startup time. 217 218 `cbmem -t` reports firmware boot time with format of 219 'Total Time: {comma separated microseconds}'. Read the time 220 and record it in `results` as the keyval 221 seconds_power_on_to_kernel. 222 223 @param results Keyvals dictionary. 224 225 """ 226 227 data = utils.system_output('cbmem -t | grep \'Total Time:\' |' 228 'awk \'{print $NF}\'') 229 firmware_time = round(float(data.replace(',', '')) / (1000 * 1000), 2) 230 231 boot_time = results['seconds_kernel_to_login'] 232 results['seconds_power_on_to_kernel'] = firmware_time 233 results['seconds_power_on_to_login'] = ( 234 round(firmware_time + boot_time, 2)) 235 236 237 def _gather_time_keyvals(self, results): 238 """Read and report boot time keyvals. 239 240 Read "seconds since kernel startup" from the bootstat files 241 for the events named in `_EVENT_KEYVALS`, and store the 242 values as perf keyvals. The following keyvals are recorded: 243 * seconds_kernel_to_startup 244 * seconds_kernel_to_startup_done 245 * seconds_kernel_to_chrome_exec 246 * seconds_kernel_to_chrome_main 247 * seconds_kernel_to_login 248 * seconds_kernel_to_network 249 All of these keyvals are considered mandatory, except 250 for seconds_kernel_to_network. 251 252 @param results Keyvals dictionary. 253 @raises error.TestFail Raised if any mandatory keyval can't 254 be determined. 255 256 """ 257 for keyval_name, event_name, required in self._EVENT_KEYVALS: 258 key = 'seconds_' + keyval_name 259 try: 260 results[key] = self._parse_uptime(event_name, required=required) 261 except error.TestFail: 262 if required: 263 raise; 264 265 # Not all 'uptime-network-*-ready' files necessarily exist; 266 # probably there's only one. We go through a list of 267 # possibilities and pick the earliest one we find. We're not 268 # looking for 3G here, so we're not guaranteed to find any 269 # file. 270 network_ready_events = [ 271 'network-wifi-ready', 272 'network-ethernet-ready' 273 ] 274 network_ready_timestamp = float('inf') 275 for event_name in network_ready_events: 276 metric_name = ('seconds_kernel_to_' + 277 event_name.replace('-', '_')) 278 try: 279 network_time = self._parse_uptime(event_name) 280 results[metric_name] = network_time 281 if network_time < network_ready_timestamp: 282 network_ready_timestamp = network_time 283 results['seconds_kernel_to_network'] = network_time 284 except error.TestFail: 285 pass 286 287 288 def _gather_disk_keyvals(self, results): 289 """Read and report disk read keyvals. 290 291 Read "sectors read since kernel startup" from the bootstat 292 files for the events named in `_EVENT_KEYVALS`, convert the 293 values to "bytes read since boot", and store the values as 294 perf keyvals. The following keyvals are recorded: 295 * rdbytes_kernel_to_startup 296 * rdbytes_kernel_to_startup_done 297 * rdbytes_kernel_to_chrome_exec 298 * rdbytes_kernel_to_chrome_main 299 * rdbytes_kernel_to_login 300 301 Disk statistics are reported in units of 512 byte sectors; 302 we convert the keyvals to bytes so that downstream consumers 303 don't have to ask "How big is a sector?". 304 305 @param results Keyvals dictionary. 306 307 """ 308 # We expect an error when reading disk statistics for the 309 # "chrome-main" event because Chrome (not bootstat) generates 310 # that event, and it doesn't include the disk statistics. 311 # We get around that by ignoring all errors. 312 for keyval_name, event_name, required in self._EVENT_KEYVALS: 313 try: 314 key = 'rdbytes_' + keyval_name 315 results[key] = 512 * self._parse_diskstat(event_name) 316 except Exception: 317 pass 318 319 320 def _calculate_timeval(self, event, t0, t1, t_uptime): 321 """Estimate the absolute time of a time since boot. 322 323 Input values `event` and `t_uptime` are times measured as 324 seconds since boot (for the same boot event, as from 325 /proc/uptime). The input values `t0` and `t1` are two 326 values measured as seconds since the epoch. The three "t" 327 values were sampled in the order `t0`, `t_uptime`, `t1`. 328 329 Estimate the time of `event` measured as seconds since the 330 epoch. Also estimate the worst-case error based on the time 331 elapsed between `t0` and `t1`. 332 333 All values are floats. The precision of `event` and 334 `t_uptime` is expected to be kernel jiffies (i.e. one 335 centisecond). The output result is rounded to the nearest 336 jiffy. 337 338 @param event A time to be converted from "seconds since 339 boot" into "seconds since the epoch". 340 @param t0 A reference time earlier than time `t1`, and 341 measured as "seconds since the epoch". 342 @param t1 A reference time later than time `t0`, and 343 measured as "seconds since the epoch". 344 @param t_uptime A reference time measured as "seconds since 345 boot", in between time `t0` and `t1`. 346 347 @return An estimate of the time of `event` measured 348 as seconds since the epoch, rounded to the 349 nearest jiffy. 350 351 """ 352 # Floating point geeks may argue that these calculations 353 # don't guarantee the promised precision: I don't care; 354 # it's good enough. 355 boot_timeval = round((t0 + t1) / 2, 2) - t_uptime 356 error = (t1 - t0) / 2 357 return boot_timeval + event, error 358 359 360 def _gather_reboot_keyvals(self, results): 361 """Read and report shutdown and reboot times. 362 363 The shutdown process saves all bootstat files in /var/log, 364 plus it saves a timestamp file that can be used to convert 365 "time since boot" into times in UTC. Read the saved files 366 from the most recent shutdown, and use them to calculate 367 the time spent from the start of that shutdown until the 368 completion of the most recent boot. Record these keyvals: 369 * seconds_shutdown_time 370 * seconds_reboot_time 371 * seconds_reboot_error 372 373 @param results Keyvals dictionary. 374 375 """ 376 bootstat_archives = glob.glob(self._BOOTSTAT_ARCHIVE_GLOB) 377 if not bootstat_archives: 378 return 379 bootstat_dir = max(bootstat_archives) 380 boot_id = open("/proc/sys/kernel/random/boot_id", "r").read() 381 didrun_path = os.path.join(bootstat_dir, "bootperf_ran") 382 if not os.path.exists(didrun_path): 383 with open(didrun_path, "w") as didrun: 384 didrun.write(boot_id) 385 elif open(didrun_path, "r").read() != boot_id: 386 logging.warning("Ignoring reboot based on stale shutdown %s", 387 os.path.basename(bootstat_dir)) 388 return 389 timestamp_path = os.path.join(bootstat_dir, 'timestamp') 390 try: 391 with open(timestamp_path) as timestamp: 392 archive_t0 = float(timestamp.readline()) 393 archive_t1 = float(timestamp.readline()) 394 except IOError: 395 raise error.TestFail('Failed to read "%s"' % timestamp_path) 396 archive_uptime = self._parse_uptime('archive', 397 bootstat_dir=bootstat_dir) 398 shutdown_uptime = self._parse_uptime('ui-post-stop', 399 bootstat_dir=bootstat_dir, 400 index=-1) 401 shutdown_timeval, shutdown_error = self._calculate_timeval( 402 shutdown_uptime, archive_t0, archive_t1, archive_uptime) 403 boot_t0 = time.time() 404 with open('/proc/uptime') as uptime_file: 405 uptime = float(uptime_file.readline().split()[0]) 406 boot_t1 = time.time() 407 boot_timeval, boot_error = self._calculate_timeval( 408 results['seconds_kernel_to_login'], 409 boot_t0, boot_t1, uptime) 410 reboot_time = round(boot_timeval - shutdown_timeval, 2) 411 poweron_time = results['seconds_power_on_to_login'] 412 shutdown_time = round(reboot_time - poweron_time, 2) 413 results['seconds_reboot_time'] = reboot_time 414 results['seconds_reboot_error'] = shutdown_error + boot_error 415 results['seconds_shutdown_time'] = shutdown_time 416 417 418 def _calculate_diff(self, results): 419 barriers = ['startup', 'chrome_exec', 'login'] 420 for i in range(len(barriers) - 1): 421 for type in ['seconds', 'rdbytes']: 422 begin = '%s_kernel_to_%s' % (type, barriers[i]) 423 end = '%s_kernel_to_%s' % (type, barriers[i + 1]) 424 if begin in results and end in results: 425 diff_name = '%s_%s_to_%s' % (type, barriers[i], barriers[i + 1]) 426 results[diff_name] = results[end] - results[begin] 427 428 429 def run_once(self): 430 """Gather boot time statistics. 431 432 Every shutdown and boot creates `bootstat` files with 433 summary statistics for time elapsed and disk usage. Gather 434 the values reported for shutdown, boot time and network 435 startup time, and record them as perf keyvals. 436 437 Additionally, gather information about firmware startup time 438 from various sources, and record the times as perf keyvals. 439 440 Finally, copy the raw data files to the results directory 441 for reference. 442 443 """ 444 # `results` is the keyvals dictionary 445 results = {} 446 447 self._gather_time_keyvals(results) 448 self._gather_disk_keyvals(results) 449 self._gather_firmware_boot_time(results) 450 self._gather_reboot_keyvals(results) 451 self._calculate_diff(results) 452 453 self._copy_timestamp_files() 454 self._copy_console_ramoops() 455 self._store_fw_timestamps() 456 457 self.write_perf_keyval(results) 458 459 if utils.system('crossystem mainfw_type?normal', 460 ignore_status=True) != 0: 461 raise error.TestNAError( 462 'Firmware boot times are not accurate in developer mode. ' 463 'Please run this test in normal mode.') 464 if 'EC returned from reboot' in utils.system_output('cbmem -1'): 465 raise error.TestNAError( 466 'Firmware boot times should be measured without an EC reboot. ' 467 'Please warm reboot this system by running the "reboot" userspace command, then rerun the test.' 468 ) 469 # This is looking for the LB_TAG_SERIAL entry in the coreboot table. 470 # 0x0000000f = tag, 0x00000020 = size, 0x0001c200 = 115200 baud. 471 if re.search('\x0f\0\0\0\x20\0\0\0........\0\xc2\x01\0', 472 utils.system_output('cbmem -r 43425442')): 473 raise error.TestNAError( 474 'Firmware boot times should be measured without serial output. ' 475 'Please rerun this test with a production image (image.bin, not image.dev.bin).' 476 ) 477