• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import logging
13import os
14import time
15import traceback
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
21from autotest_lib.client.common_lib import hosts
22from autotest_lib.client.common_lib.cros import retry
23from autotest_lib.client.common_lib.cros.network import ping_runner
24from autotest_lib.server.cros.servo import servo
25from autotest_lib.server.hosts import servo_repair
26from autotest_lib.server.hosts import base_servohost
27
28
29# Names of the host attributes in the database that represent the values for
30# the servo_host and servo_port for a servo connected to the DUT.
31SERVO_HOST_ATTR = 'servo_host'
32SERVO_PORT_ATTR = 'servo_port'
33SERVO_BOARD_ATTR = 'servo_board'
34# Model is inferred from host labels.
35SERVO_MODEL_ATTR = 'servo_model'
36SERVO_SERIAL_ATTR = 'servo_serial'
37SERVO_ATTR_KEYS = (
38        SERVO_BOARD_ATTR,
39        SERVO_HOST_ATTR,
40        SERVO_PORT_ATTR,
41        SERVO_SERIAL_ATTR,
42)
43
44# Timeout value for stop/start servod process.
45SERVOD_TEARDOWN_TIMEOUT = 3
46SERVOD_QUICK_STARTUP_TIMEOUT = 20
47SERVOD_STARTUP_TIMEOUT = 60
48
49_CONFIG = global_config.global_config
50ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
51        'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
52
53AUTOTEST_BASE = _CONFIG.get_config_value(
54        'SCHEDULER', 'drone_installation_directory',
55        default='/usr/local/autotest')
56
57SERVO_STATE_LABEL_PREFIX = 'servo_state'
58SERVO_STATE_WORKING = 'WORKING'
59SERVO_STATE_BROKEN = 'BROKEN'
60
61
62class ServoHost(base_servohost.BaseServoHost):
63    """Host class for a servo host(e.g. beaglebone, labstation)
64     that with a servo instance for a specific port.
65
66     @type _servo: servo.Servo | None
67     """
68
69    DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
70
71    # Timeout for initializing servo signals.
72    INITIALIZE_SERVO_TIMEOUT_SECS = 60
73
74    # Ready test function
75    SERVO_READY_METHOD = 'get_version'
76
77    def _init_attributes(self):
78        self._servo_state = None
79        self.servo_port = None
80        self.servo_board = None
81        self.servo_model = None
82        self.servo_serial = None
83        self._servo = None
84        self._servod_server_proxy = None
85
86
87    def _initialize(self, servo_host='localhost',
88                    servo_port=DEFAULT_PORT, servo_board=None,
89                    servo_model=None, servo_serial=None, is_in_lab=None,
90                    *args, **dargs):
91        """Initialize a ServoHost instance.
92
93        A ServoHost instance represents a host that controls a servo.
94
95        @param servo_host: Name of the host where the servod process
96                           is running.
97        @param servo_port: Port the servod process is listening on. Defaults
98                           to the SERVOD_PORT environment variable if set,
99                           otherwise 9999.
100        @param servo_board: Board that the servo is connected to.
101        @param servo_model: Model that the servo is connected to.
102        @param is_in_lab: True if the servo host is in Cros Lab. Default is set
103                          to None, for which utils.host_is_in_lab_zone will be
104                          called to check if the servo host is in Cros lab.
105
106        """
107        super(ServoHost, self)._initialize(hostname=servo_host,
108                                           is_in_lab=is_in_lab, *args, **dargs)
109        self._init_attributes()
110        self.servo_port = int(servo_port)
111        self.servo_board = servo_board
112        self.servo_model = servo_model
113        self.servo_serial = servo_serial
114
115        # Path of the servo host lock file.
116        self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
117                           + self.LOCK_FILE_POSTFIX)
118        # File path to declare a reboot request.
119        self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
120                             + self.REBOOT_FILE_POSTFIX)
121
122        # Lock the servo host if it's an in-lab labstation to prevent other
123        # task to reboot it until current task completes. We also wait and
124        # make sure the labstation is up here, in the case of the labstation is
125        # in the middle of reboot.
126        self._is_locked = False
127        if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
128            and self.is_labstation()):
129            self._lock()
130
131        self._repair_strategy = (
132                servo_repair.create_servo_repair_strategy())
133
134    def connect_servo(self):
135        """Establish a connection to the servod server on this host.
136
137        Initializes `self._servo` and then verifies that all network
138        connections are working.  This will create an ssh tunnel if
139        it's required.
140
141        As a side effect of testing the connection, all signals on the
142        target servo are reset to default values, and the USB stick is
143        set to the neutral (off) position.
144        """
145        servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
146        self._servo = servo_obj
147        timeout, _ = retry.timeout(
148                servo_obj.initialize_dut,
149                timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
150        if timeout:
151            raise hosts.AutoservVerifyError(
152                    'Servo initialize timed out.')
153
154
155    def disconnect_servo(self):
156        """Disconnect our servo if it exists.
157
158        If we've previously successfully connected to our servo,
159        disconnect any established ssh tunnel, and set `self._servo`
160        back to `None`.
161        """
162        if self._servo:
163            # N.B. This call is safe even without a tunnel:
164            # rpc_server_tracker.disconnect() silently ignores
165            # unknown ports.
166            self.rpc_server_tracker.disconnect(self.servo_port)
167            self._servo = None
168
169
170    def _create_servod_server_proxy(self):
171        """Create a proxy that can be used to communicate with servod server.
172
173        @returns: An xmlrpclib.ServerProxy that is connected to the servod
174                  server on the host.
175        """
176        if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
177            return self.rpc_server_tracker.xmlrpc_connect(
178                    None, self.servo_port,
179                    ready_test_name=self.SERVO_READY_METHOD,
180                    timeout_seconds=60,
181                    request_timeout_seconds=3600)
182        else:
183            remote = 'http://%s:%s' % (self.hostname, self.servo_port)
184            return xmlrpclib.ServerProxy(remote)
185
186
187    def get_servod_server_proxy(self):
188        """Return a cached proxy if exists; otherwise, create a new one.
189
190        @returns: An xmlrpclib.ServerProxy that is connected to the servod
191                  server on the host.
192        """
193        # Single-threaded execution, no race
194        if self._servod_server_proxy is None:
195            self._servod_server_proxy = self._create_servod_server_proxy()
196        return self._servod_server_proxy
197
198
199    def verify(self, silent=False):
200        """Update the servo host and verify it's in a good state.
201
202        @param silent   If true, suppress logging in `status.log`.
203        """
204        message = 'Beginning verify for servo host %s port %s serial %s'
205        message %= (self.hostname, self.servo_port, self.servo_serial)
206        self.record('INFO', None, None, message)
207        try:
208            self._repair_strategy.verify(self, silent)
209            self._servo_state = SERVO_STATE_WORKING
210            self.record('INFO', None, None, 'ServoHost verify set servo_state as WORKING')
211        except:
212            self._servo_state = SERVO_STATE_BROKEN
213            self.record('INFO', None, None, 'ServoHost verify set servo_state as BROKEN')
214            self.disconnect_servo()
215            self.stop_servod()
216            raise
217
218
219    def repair(self, silent=False):
220        """Attempt to repair servo host.
221
222        @param silent   If true, suppress logging in `status.log`.
223        """
224        message = 'Beginning repair for servo host %s port %s serial %s'
225        message %= (self.hostname, self.servo_port, self.servo_serial)
226        self.record('INFO', None, None, message)
227        try:
228            self._repair_strategy.repair(self, silent)
229            self._servo_state = SERVO_STATE_WORKING
230            self.record('INFO', None, None, 'ServoHost repair set servo_state as WORKING')
231            # If target is a labstation then try to withdraw any existing
232            # reboot request created by this servo because it passed repair.
233            if self.is_labstation():
234                self.withdraw_reboot_request()
235        except:
236            self._servo_state = SERVO_STATE_BROKEN
237            self.record('INFO', None, None, 'ServoHost repair set servo_state as BROKEN')
238            self.disconnect_servo()
239            self.stop_servod()
240            raise
241
242
243    def get_servo(self):
244        """Get the cached servo.Servo object.
245
246        @return: a servo.Servo object.
247        @rtype: autotest_lib.server.cros.servo.servo.Servo
248        """
249        return self._servo
250
251
252    def request_reboot(self):
253        """Request servohost to be rebooted when it's safe to by touch a file.
254        """
255        logging.debug('Request to reboot servohost %s has been created by '
256                      'servo with port # %s', self.hostname, self.servo_port)
257        self.run('touch %s' % self._reboot_file, ignore_status=True)
258
259
260    def withdraw_reboot_request(self):
261        """Withdraw a servohost reboot request if exists by remove the flag
262        file.
263        """
264        logging.debug('Withdrawing request to reboot servohost %s that created'
265                      ' by servo with port # %s if exists.',
266                      self.hostname, self.servo_port)
267        self.run('rm -f %s' % self._reboot_file, ignore_status=True)
268
269
270    def start_servod(self, quick_startup=False):
271        """Start the servod process on servohost.
272        """
273        # Skip if running on the localhost.(crbug.com/1038168)
274        if self.is_localhost():
275            logging.debug("Servohost is a localhost, skipping start servod.")
276            return
277
278        cmd = 'start servod'
279        if self.servo_board:
280            cmd += ' BOARD=%s' % self.servo_board
281            if self.servo_model:
282                cmd += ' MODEL=%s' % self.servo_model
283        else:
284            logging.warning('Board for DUT is unknown; starting servod'
285                            ' assuming a pre-configured board.')
286
287        cmd += ' PORT=%d' % self.servo_port
288        if self.servo_serial:
289            cmd += ' SERIAL=%s' % self.servo_serial
290        self.run(cmd, timeout=60)
291
292        # There's a lag between when `start servod` completes and when
293        # the _ServodConnectionVerifier trigger can actually succeed.
294        # The call to time.sleep() below gives time to make sure that
295        # the trigger won't fail after we return.
296
297        # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
298        # But in the rare case all servo on a labstation are in heavy use they
299        # may take ~30 seconds. So the timeout value will double these value,
300        # and we'll try quick start up when first time initialize servohost,
301        # and use standard start up timeout in repair.
302        if quick_startup:
303            timeout = SERVOD_QUICK_STARTUP_TIMEOUT
304        else:
305            timeout = SERVOD_STARTUP_TIMEOUT
306        logging.debug('Wait %s seconds for servod process fully up.', timeout)
307        time.sleep(timeout)
308
309
310    def stop_servod(self):
311        """Stop the servod process on servohost.
312        """
313        # Skip if running on the localhost.(crbug.com/1038168)
314        if self.is_localhost():
315            logging.debug("Servohost is a localhost, skipping stop servod.")
316            return
317
318        logging.debug('Stopping servod on port %s', self.servo_port)
319        self.run('stop servod PORT=%d' % self.servo_port,
320                 timeout=60, ignore_status=True)
321        logging.debug('Wait %s seconds for servod process fully teardown.',
322                      SERVOD_TEARDOWN_TIMEOUT)
323        time.sleep(SERVOD_TEARDOWN_TIMEOUT)
324
325
326    def restart_servod(self, quick_startup=False):
327        """Restart the servod process on servohost.
328        """
329        self.stop_servod()
330        self.start_servod(quick_startup)
331
332
333    def _lock(self):
334        """lock servohost by touching a file.
335        """
336        logging.debug('Locking servohost %s by touching %s file',
337                      self.hostname, self._lock_file)
338        self.run('touch %s' % self._lock_file, ignore_status=True)
339        self._is_locked = True
340
341
342    def _unlock(self):
343        """Unlock servohost by removing the lock file.
344        """
345        logging.debug('Unlocking servohost by removing %s file',
346                      self._lock_file)
347        self.run('rm %s' % self._lock_file, ignore_status=True)
348        self._is_locked = False
349
350
351    def close(self):
352        """Close the associated servo and the host object."""
353        if self._servo:
354            # In some cases when we run as lab-tools, the job object is None.
355            if self.job and not self._servo.uart_logs_dir:
356                self._servo.uart_logs_dir = self.job.resultdir
357            self._servo.close()
358
359        if self._is_locked:
360            # Remove the lock if the servohost has been locked.
361            try:
362                self._unlock()
363            except error.AutoservSSHTimeout:
364                logging.error('Unlock servohost failed due to ssh timeout.'
365                              ' It may caused by servohost went down during'
366                              ' the task.')
367
368        # We want always stop servod after task to minimum the impact of bad
369        # servod process interfere other servods.(see crbug.com/1028665)
370        try:
371            self.stop_servod()
372        except error.AutoservRunError as e:
373            logging.info("Failed to stop servod due to:\n%s\n"
374                         "This error is forgived.", str(e))
375
376        super(ServoHost, self).close()
377
378
379    def get_servo_state(self):
380        return SERVO_STATE_BROKEN if self._servo_state is None else self._servo_state
381
382
383def make_servo_hostname(dut_hostname):
384    """Given a DUT's hostname, return the hostname of its servo.
385
386    @param dut_hostname: hostname of a DUT.
387
388    @return hostname of the DUT's servo.
389
390    """
391    host_parts = dut_hostname.split('.')
392    host_parts[0] = host_parts[0] + '-servo'
393    return '.'.join(host_parts)
394
395
396def servo_host_is_up(servo_hostname):
397    """Given a servo host name, return if it's up or not.
398
399    @param servo_hostname: hostname of the servo host.
400
401    @return True if it's up, False otherwise
402    """
403    # Technically, this duplicates the SSH ping done early in the servo
404    # proxy initialization code.  However, this ping ends in a couple
405    # seconds when if fails, rather than the 60 seconds it takes to decide
406    # that an SSH ping has timed out.  Specifically, that timeout happens
407    # when our servo DNS name resolves, but there is no host at that IP.
408    logging.info('Pinging servo host at %s', servo_hostname)
409    ping_config = ping_runner.PingConfig(
410            servo_hostname, count=3,
411            ignore_result=True, ignore_status=True)
412    return ping_runner.PingRunner().ping(ping_config).received > 0
413
414
415def _map_afe_board_to_servo_board(afe_board):
416    """Map a board we get from the AFE to a servo appropriate value.
417
418    Many boards are identical to other boards for servo's purposes.
419    This function makes that mapping.
420
421    @param afe_board string board name received from AFE.
422    @return board we expect servo to have.
423
424    """
425    KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
426    BOARD_MAP = {'gizmo': 'panther'}
427    mapped_board = afe_board
428    if afe_board in BOARD_MAP:
429        mapped_board = BOARD_MAP[afe_board]
430    else:
431        for suffix in KNOWN_SUFFIXES:
432            if afe_board.endswith(suffix):
433                mapped_board = afe_board[0:-len(suffix)]
434                break
435    if mapped_board != afe_board:
436        logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
437    return mapped_board
438
439
440def get_servo_args_for_host(dut_host):
441    """Return servo data associated with a given DUT.
442
443    @param dut_host   Instance of `Host` on which to find the servo
444                      attributes.
445    @return `servo_args` dict with host and an optional port.
446    """
447    info = dut_host.host_info_store.get()
448    servo_args = {k: v for k, v in info.attributes.iteritems()
449                  if k in SERVO_ATTR_KEYS}
450
451    if SERVO_PORT_ATTR in servo_args:
452        try:
453            servo_args[SERVO_PORT_ATTR] = int(servo_args[SERVO_PORT_ATTR])
454        except ValueError:
455            logging.error('servo port is not an int: %s',
456                          servo_args[SERVO_PORT_ATTR])
457            # Reset servo_args because we don't want to use an invalid port.
458            servo_args.pop(SERVO_HOST_ATTR, None)
459
460    if info.board:
461        servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(info.board)
462    if info.model:
463        servo_args[SERVO_MODEL_ATTR] = info.model
464    return servo_args if SERVO_HOST_ATTR in servo_args else None
465
466
467def _tweak_args_for_ssp_moblab(servo_args):
468    if servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']:
469        servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
470                'SSP', 'host_container_ip', type=str, default=None)
471
472
473def create_servo_host(dut, servo_args, try_lab_servo=False,
474                      try_servo_repair=False, dut_host_info=None):
475    """Create a ServoHost object for a given DUT, if appropriate.
476
477    This function attempts to create and verify or repair a `ServoHost`
478    object for a servo connected to the given `dut`, subject to various
479    constraints imposed by the parameters:
480      * When the `servo_args` parameter is not `None`, a servo
481        host must be created, and must be checked with `repair()`.
482      * Otherwise, if a servo exists in the lab and `try_lab_servo` is
483        true:
484          * If `try_servo_repair` is true, then create a servo host and
485            check it with `repair()`.
486          * Otherwise, if the servo responds to `ping` then create a
487            servo host and check it with `verify()`.
488
489    In cases where `servo_args` was not `None`, repair failure
490    exceptions are passed back to the caller; otherwise, exceptions
491    are logged and then discarded.  Note that this only happens in cases
492    where we're called from a test (not special task) control file that
493    has an explicit dependency on servo.  In that case, we require that
494    repair not write to `status.log`, so as to avoid polluting test
495    results.
496
497    TODO(jrbarnette):  The special handling for servo in test control
498    files is a thorn in my flesh; I dearly hope to see it cut out before
499    my retirement.
500
501    Parameters for a servo host consist of a host name, port number, and
502    DUT board, and are determined from one of these sources, in order of
503    priority:
504      * Servo attributes from the `dut` parameter take precedence over
505        all other sources of information.
506      * If a DNS entry for the servo based on the DUT hostname exists in
507        the CrOS lab network, that hostname is used with the default
508        port and the DUT's board.
509      * If no other options are found, the parameters will be taken
510        from the `servo_args` dict passed in from the caller.
511
512    @param dut            An instance of `Host` from which to take
513                          servo parameters (if available).
514    @param servo_args     A dictionary with servo parameters to use if
515                          they can't be found from `dut`.  If this
516                          argument is supplied, unrepaired exceptions
517                          from `verify()` will be passed back to the
518                          caller.
519    @param try_lab_servo  If not true, servo host creation will be
520                          skipped unless otherwise required by the
521                          caller.
522    @param try_servo_repair  If true, check a servo host with
523                          `repair()` instead of `verify()`.
524
525    @returns: A ServoHost object or None. See comments above.
526
527    """
528    servo_dependency = servo_args is not None
529    if dut is not None and (try_lab_servo or servo_dependency):
530        servo_args_override = get_servo_args_for_host(dut)
531        if servo_args_override is not None:
532            if utils.in_moblab_ssp():
533                _tweak_args_for_ssp_moblab(servo_args_override)
534            logging.debug(
535                    'Overriding provided servo_args (%s) with arguments'
536                    ' determined from the host (%s)',
537                    servo_args,
538                    servo_args_override,
539            )
540            servo_args = servo_args_override
541
542    if servo_args is None:
543        logging.debug('No servo_args provided, and failed to find overrides.')
544        return None
545    if SERVO_HOST_ATTR not in servo_args:
546        logging.debug('%s attribute missing from servo_args: %s',
547                      SERVO_HOST_ATTR, servo_args)
548        return None
549    if (not servo_dependency and not try_servo_repair and
550            not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
551        logging.debug('ServoHost is not up.')
552        return None
553
554    newhost = ServoHost(**servo_args)
555    try:
556        newhost.restart_servod(quick_startup=True)
557    except error.AutoservSSHTimeout:
558        logging.warning("Restart servod failed due ssh connection "
559                        "to servohost timed out. This error is forgiven"
560                        " here, we will retry in servo repair process.")
561    except error.AutoservRunError as e:
562        logging.warning("Restart servod failed due to:\n%s\n"
563                        "This error is forgiven here, we will retry"
564                        " in servo repair process.", str(e))
565
566    # TODO(gregorynisbet): Clean all of this up.
567    logging.debug('create_servo_host: attempt to set info store on '
568                  'servo host')
569    try:
570        if dut_host_info is None:
571            logging.debug('create_servo_host: dut_host_info is '
572                          'None, skipping')
573        else:
574            newhost.set_dut_host_info(dut_host_info)
575            logging.debug('create_servo_host: successfully set info '
576                          'store')
577    except Exception:
578        logging.error("create_servo_host: (%s)", traceback.format_exc())
579
580    # Note that the logic of repair() includes everything done
581    # by verify().  It's sufficient to call one or the other;
582    # we don't need both.
583    if servo_dependency:
584        newhost.repair(silent=True)
585        return newhost
586
587    if try_servo_repair:
588        try:
589            newhost.repair()
590        except Exception:
591            logging.exception('servo repair failed for %s', newhost.hostname)
592    else:
593        try:
594            newhost.verify()
595        except Exception:
596            logging.exception('servo verify failed for %s', newhost.hostname)
597    return newhost
598