1# -*- coding: utf-8 -*- 2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Functions for implementing timeouts.""" 7 8from __future__ import print_function 9 10import contextlib 11import datetime 12import functools 13import signal 14import threading 15import time 16 17from autotest_lib.utils.frozen_chromite.lib import cros_logging as logging 18 19 20class TimeoutError(Exception): # pylint: disable=redefined-builtin 21 """Raises when code within Timeout has been run too long.""" 22 23 24def Timedelta(num, zero_ok=False): 25 """Normalize |num| (in seconds) into a datetime.timedelta.""" 26 if not isinstance(num, datetime.timedelta): 27 num = datetime.timedelta(seconds=num) 28 if zero_ok: 29 if num.total_seconds() < 0: 30 raise ValueError('timing must be >= 0, not %s' % (num,)) 31 else: 32 if num.total_seconds() <= 0: 33 raise ValueError('timing must be greater than 0, not %s' % (num,)) 34 return num 35 36 37def _ScheduleTimer(seconds, interval=0): 38 """Schedules the timer to raise SIGALRM. 39 40 If |seconds| is less than minimum resolution, it would be round up to the 41 resolution. 42 Note: if the seconds is very short, the signal can be delivered almost 43 immediately, so that handler can be called even in this stack. 44 45 Args: 46 seconds: How long to wait before sending SIGALRM, in seconds. 47 interval: (Optional) interval schedule for the timer. 48 """ 49 # Min resolution of itimer. See man setitimer(2) for details. 50 MIN_SECONDS = 0.000001 51 signal.setitimer(signal.ITIMER_REAL, max(seconds, MIN_SECONDS), interval) 52 53 54def _CancelTimer(): 55 """Cancels the currently scheduled SIGALRM timer. 56 57 Returns: 58 Previous timer, which is a pair of scheduled timeout and interval. 59 """ 60 return signal.setitimer(signal.ITIMER_REAL, 0) 61 62 63@contextlib.contextmanager 64def Timeout(max_run_time, 65 error_message='Timeout occurred- waited %(time)s seconds.', 66 reason_message=None): 67 """ContextManager that alarms if code is ran for too long. 68 69 Timeout can run nested and raises a TimeoutException if the timeout 70 is reached. Timeout can also nest underneath FatalTimeout. 71 72 Args: 73 max_run_time: How long to wait before sending SIGALRM. May be a number 74 (in seconds, can be fractional) or a datetime.timedelta object. 75 error_message: Optional string to wrap in the TimeoutError exception on 76 timeout. If not provided, default template will be used. 77 reason_message: Optional string to be appended to the TimeoutError 78 error_message string. Provide a custom message here if you want to have 79 a purpose-specific message without overriding the default template in 80 |error_message|. 81 """ 82 max_run_time = Timedelta(max_run_time).total_seconds() 83 if reason_message: 84 error_message += reason_message 85 86 # pylint: disable=unused-argument 87 def kill_us(sig_num, frame): 88 raise TimeoutError(error_message % {'time': max_run_time}) 89 90 previous_time = time.time() 91 previous_timeout, previous_interval = _CancelTimer() 92 original_handler = signal.signal(signal.SIGALRM, kill_us) 93 94 try: 95 # Signal the min in case the leftover time was smaller than this timeout. 96 # This needs to be called in try block, otherwise, finally may not be 97 # called in case that the timeout duration is too short. 98 _ScheduleTimer(min(previous_timeout or float('inf'), max_run_time)) 99 yield 100 finally: 101 # Cancel the alarm request and restore the original handler. 102 _CancelTimer() 103 signal.signal(signal.SIGALRM, original_handler) 104 105 # Ensure the previous handler will fire if it was meant to. 106 if previous_timeout: 107 remaining_timeout = previous_timeout - (time.time() - previous_time) 108 # It is ok to pass negative remaining_timeout. Please see also comments 109 # of _ScheduleTimer for more details. 110 _ScheduleTimer(remaining_timeout, previous_interval) 111 112 113@contextlib.contextmanager 114def FatalTimeout(max_run_time, display_message=None): 115 """ContextManager that exits the program if code is run for too long. 116 117 This implementation is fairly simple, thus multiple timeouts 118 cannot be active at the same time. 119 120 Additionally, if the timeout has elapsed, it'll trigger a SystemExit 121 exception within the invoking code, ultimately propagating that past 122 itself. If the underlying code tries to suppress the SystemExit, once 123 a minute it'll retrigger SystemExit until control is returned to this 124 manager. 125 126 Args: 127 max_run_time: How long to wait. May be a number (in seconds, can be 128 fractional) or a datetime.timedelta object. 129 display_message: Optional string message to be included in timeout 130 error message, if the timeout occurs. 131 """ 132 max_run_time = Timedelta(max_run_time).total_seconds() 133 134 # pylint: disable=unused-argument 135 def kill_us(sig_num, frame): 136 # While this SystemExit *should* crash it's way back up the 137 # stack to our exit handler, we do have live/production code 138 # that uses blanket except statements which could suppress this. 139 # As such, keep scheduling alarms until our exit handler runs. 140 # Note that there is a potential conflict via this code, and 141 # run's kill_timeout; thus we set the alarming interval 142 # fairly high. 143 _ScheduleTimer(60) 144 145 # The cbuildbot stage that gets aborted by this timeout should be treated as 146 # failed by buildbot. 147 error_message = ('Timeout occurred- waited %i seconds, failing.' % 148 max_run_time) 149 if display_message: 150 error_message += ' Timeout reason: %s' % display_message 151 logging.PrintBuildbotStepFailure() 152 logging.error(error_message) 153 raise SystemExit(error_message) 154 155 if signal.getitimer(signal.ITIMER_REAL)[0]: 156 raise Exception('FatalTimeout cannot be used in parallel to other alarm ' 157 'handling code; failing') 158 159 original_handler = signal.signal(signal.SIGALRM, kill_us) 160 try: 161 _ScheduleTimer(max_run_time) 162 yield 163 finally: 164 # Cancel the alarm request and restore the original handler. 165 _CancelTimer() 166 signal.signal(signal.SIGALRM, original_handler) 167 168 169def TimeoutDecorator(max_time): 170 """Decorator used to ensure a func is interrupted if it's running too long.""" 171 # Save off the built-in versions of time.time, signal.signal, and 172 # signal.alarm, in case they get mocked out later. We want to ensure that 173 # tests don't accidentally mock out the functions used by Timeout. 174 def _Save(): 175 return (time.time, signal.signal, signal.setitimer, signal.getitimer, 176 signal.SIGALRM, signal.ITIMER_REAL) 177 def _Restore(values): 178 (time.time, signal.signal, signal.setitimer, signal.getitimer, 179 signal.SIGALRM, signal.ITIMER_REAL) = values 180 builtins = _Save() 181 182 def NestedTimeoutDecorator(func): 183 @functools.wraps(func) 184 def TimeoutWrapper(*args, **kwargs): 185 new = _Save() 186 try: 187 _Restore(builtins) 188 with Timeout(max_time): 189 _Restore(new) 190 try: 191 return func(*args, **kwargs) 192 finally: 193 _Restore(builtins) 194 finally: 195 _Restore(new) 196 197 return TimeoutWrapper 198 199 return NestedTimeoutDecorator 200 201 202def WaitForReturnTrue(*args, **kwargs): 203 """Periodically run a function, waiting in between runs. 204 205 Continues to run until the function returns True. 206 207 Args: 208 See WaitForReturnValue([True], ...) 209 210 Raises: 211 TimeoutError when the timeout is exceeded. 212 """ 213 WaitForReturnValue([True], *args, **kwargs) 214 215 216def WaitForReturnValue(values, *args, **kwargs): 217 """Periodically run a function, waiting in between runs. 218 219 Continues to run until the function return value is in the list 220 of accepted |values|. See WaitForSuccess for more details. 221 222 Args: 223 values: A list or set of acceptable return values. 224 *args, **kwargs: See WaitForSuccess for remaining arguments. 225 226 Returns: 227 The value most recently returned by |func|. 228 229 Raises: 230 TimeoutError when the timeout is exceeded. 231 """ 232 def _Retry(return_value): 233 return return_value not in values 234 235 return WaitForSuccess(_Retry, *args, **kwargs) 236 237 238def WaitForSuccess(retry_check, func, timeout, period=1, side_effect_func=None, 239 func_args=None, func_kwargs=None, fallback_timeout=10): 240 """Periodically run a function, waiting in between runs. 241 242 Continues to run given function until return value is accepted by retry check. 243 244 To retry based on raised exceptions see GenericRetry in retry_util. 245 246 Args: 247 retry_check: A functor that will be passed the return value of |func| as 248 the only argument. If |func| should be retried |retry_check| should 249 return True. 250 func: The function to run to test for a value. 251 timeout: The maximum amount of time to wait. May be a number (in seconds) 252 or a datetime.timedelta object. 253 period: How long between calls to |func|. May be a number (in seconds) or 254 a datetime.timedelta object. 255 side_effect_func: Optional function to be called between polls of func, 256 typically to output logging messages. The remaining time will be passed 257 as a datetime.timedelta object. 258 func_args: Optional list of positional arguments to be passed to |func|. 259 func_kwargs: Optional dictionary of keyword arguments to be passed to 260 |func|. 261 fallback_timeout: We set a secondary timeout based on sigalarm this many 262 seconds after the initial timeout. This should NOT be 263 considered robust, but can allow timeouts inside blocking 264 methods. 265 266 Returns: 267 The value most recently returned by |func| that was not flagged for retry. 268 269 Raises: 270 TimeoutError when the timeout is exceeded. 271 """ 272 timeout = Timedelta(timeout, zero_ok=True) 273 period = Timedelta(period, zero_ok=True) 274 fallback_timeout = Timedelta(fallback_timeout) 275 func_args = func_args or [] 276 func_kwargs = func_kwargs or {} 277 278 end = datetime.datetime.now() + timeout 279 280 # pylint: disable=protected-access 281 # It is used to get the main thread '_MainThread'. Without python 3.4, there 282 # may be no perfect solutions. See this discussion for details: 283 # http://stackoverflow.com/questions/23206787. 284 is_main_thread = isinstance(threading.current_thread(), 285 threading._MainThread) 286 # pylint: enable=protected-access 287 def retry(): 288 while True: 289 # Guarantee we always run at least once. 290 value = func(*func_args, **func_kwargs) 291 if not retry_check(value): 292 return value 293 294 # Run the user's callback func if available. 295 if side_effect_func: 296 delta = end - datetime.datetime.now() 297 if delta.total_seconds() < 0: 298 delta = datetime.timedelta(seconds=0) 299 side_effect_func(delta) 300 301 # If we're just going to sleep past the timeout period, abort now. 302 delta = end - datetime.datetime.now() 303 if delta <= period: 304 raise TimeoutError('Timed out after %s' % timeout) 305 306 time.sleep(period.total_seconds()) 307 308 if not is_main_thread: 309 # Warning: the function here is not working in the main thread. Since 310 # signal only works in main thread, this function may run longer than 311 # timeout or even hang. 312 return retry() 313 else: 314 # Use a sigalarm after an extra delay, in case a function we call is 315 # blocking for some reason. This should NOT be considered reliable. 316 with Timeout(timeout + fallback_timeout): 317 return retry() 318