1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import math 7import threading 8 9import common 10from autotest_lib.client.common_lib import env 11from autotest_lib.client.common_lib import error 12from autotest_lib.client.common_lib import utils 13from autotest_lib.client.common_lib.cros import retry 14from autotest_lib.frontend.afe.json_rpc import proxy 15from autotest_lib.server import frontend 16try: 17 from chromite.lib import retry_util 18 from chromite.lib import timeout_util 19except ImportError: 20 logging.warn('Unable to import chromite.') 21 retry_util = None 22 timeout_util = None 23 24try: 25 from chromite.lib import metrics 26except ImportError: 27 logging.warn('Unable to import metrics from chromite.') 28 metrics = utils.metrics_mock 29 30 31def convert_timeout_to_retry(backoff, timeout_min, delay_sec): 32 """Compute the number of retry attempts for use with chromite.retry_util. 33 34 @param backoff: The exponential backoff factor. 35 @param timeout_min: The maximum amount of time (in minutes) to sleep. 36 @param delay_sec: The amount to sleep (in seconds) between each attempt. 37 38 @return: The number of retry attempts in the case of exponential backoff. 39 """ 40 # Estimate the max_retry in the case of exponential backoff: 41 # => total_sleep = sleep*sum(r=0..max_retry-1, backoff^r) 42 # => total_sleep = sleep( (1-backoff^max_retry) / (1-backoff) ) 43 # => max_retry*ln(backoff) = ln(1-(total_sleep/sleep)*(1-backoff)) 44 # => max_retry = ln(1-(total_sleep/sleep)*(1-backoff))/ln(backoff) 45 total_sleep = timeout_min * 60 46 numerator = math.log10(1 - (total_sleep / delay_sec) * (1 - backoff)) 47 denominator = math.log10(backoff) 48 return int(math.ceil(numerator / denominator)) 49 50 51class RetryingAFE(frontend.AFE): 52 """Wrapper around frontend.AFE that retries all RPCs. 53 54 Timeout for retries and delay between retries are configurable. 55 """ 56 def __init__(self, timeout_min=30, delay_sec=10, **dargs): 57 """Constructor 58 59 @param timeout_min: timeout in minutes until giving up. 60 @param delay_sec: pre-jittered delay between retries in seconds. 61 """ 62 self.timeout_min = timeout_min 63 self.delay_sec = delay_sec 64 super(RetryingAFE, self).__init__(**dargs) 65 66 67 def set_timeout(self, timeout_min): 68 """Set timeout minutes for the AFE server. 69 70 @param timeout_min: The timeout minutes for AFE server. 71 """ 72 self.timeout_min = timeout_min 73 74 75 def run(self, call, **dargs): 76 """Method for running RPC call. 77 78 @param call: A string RPC call. 79 @param dargs: the parameters of the RPC call. 80 """ 81 if retry_util is None: 82 raise ImportError('Unable to import chromite. Please consider ' 83 'running build_externals to build site packages.') 84 # exc_retry: We retry if this exception is raised. 85 # raiselist: Exceptions that we raise immediately if caught. 86 exc_retry = Exception 87 raiselist = (ImportError, error.RPCException, proxy.JSONRPCException, 88 timeout_util.TimeoutError, error.ControlFileNotFound) 89 backoff = 2 90 max_retry = convert_timeout_to_retry(backoff, self.timeout_min, 91 self.delay_sec) 92 93 def _run(self, call, **dargs): 94 return super(RetryingAFE, self).run(call, **dargs) 95 96 def handler(exc): 97 """Check if exc is an exc_retry or if it's in raiselist. 98 99 @param exc: An exception. 100 101 @return: True if exc is an exc_retry and is not 102 in raiselist. False otherwise. 103 """ 104 is_exc_to_check = isinstance(exc, exc_retry) 105 is_in_raiselist = isinstance(exc, raiselist) 106 return is_exc_to_check and not is_in_raiselist 107 108 # If the call is not in main thread, signal can't be used to abort the 109 # call. In that case, use a basic retry which does not enforce timeout 110 # if the process hangs. 111 @retry.retry(Exception, timeout_min=self.timeout_min, 112 delay_sec=self.delay_sec, 113 raiselist=[ImportError, error.RPCException, 114 proxy.ValidationError]) 115 def _run_in_child_thread(self, call, **dargs): 116 return super(RetryingAFE, self).run(call, **dargs) 117 118 if isinstance(threading.current_thread(), threading._MainThread): 119 # Set the keyword argument for GenericRetry 120 dargs['sleep'] = self.delay_sec 121 dargs['backoff_factor'] = backoff 122 # timeout_util.Timeout fundamentally relies on sigalrm, and doesn't 123 # work at all in wsgi environment (just emits logs spam). So, don't 124 # use it in wsgi. 125 try: 126 if env.IN_MOD_WSGI: 127 return retry_util.GenericRetry(handler, max_retry, _run, 128 self, call, **dargs) 129 with timeout_util.Timeout(self.timeout_min * 60): 130 return retry_util.GenericRetry(handler, max_retry, _run, 131 self, call, **dargs) 132 except timeout_util.TimeoutError: 133 c = metrics.Counter( 134 'chromeos/autotest/retrying_afe/retry_timeout') 135 # Reserve field job_details for future use. 136 f = {'destination_server': self.server.split(':')[0], 137 'call': call, 138 'job_details': ''} 139 c.increment(fields=f) 140 raise 141 else: 142 return _run_in_child_thread(self, call, **dargs) 143 144 145class RetryingTKO(frontend.TKO): 146 """Wrapper around frontend.TKO that retries all RPCs. 147 148 Timeout for retries and delay between retries are configurable. 149 """ 150 def __init__(self, timeout_min=30, delay_sec=10, **dargs): 151 """Constructor 152 153 @param timeout_min: timeout in minutes until giving up. 154 @param delay_sec: pre-jittered delay between retries in seconds. 155 """ 156 self.timeout_min = timeout_min 157 self.delay_sec = delay_sec 158 super(RetryingTKO, self).__init__(**dargs) 159 160 161 def run(self, call, **dargs): 162 """Method for running RPC call. 163 164 @param call: A string RPC call. 165 @param dargs: the parameters of the RPC call. 166 """ 167 @retry.retry(Exception, timeout_min=self.timeout_min, 168 delay_sec=self.delay_sec, 169 raiselist=[ImportError, error.RPCException, 170 proxy.ValidationError]) 171 def _run(self, call, **dargs): 172 return super(RetryingTKO, self).run(call, **dargs) 173 return _run(self, call, **dargs) 174