• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import xmlrpclib
16
17from autotest_lib.client.bin import utils
18from autotest_lib.client.common_lib import control_data
19from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
21from autotest_lib.client.common_lib import host_states
22from autotest_lib.client.common_lib import hosts
23from autotest_lib.client.common_lib import lsbrelease_utils
24from autotest_lib.client.common_lib.cros import autoupdater
25from autotest_lib.client.common_lib.cros import dev_server
26from autotest_lib.client.common_lib.cros import retry
27from autotest_lib.client.common_lib.cros.network import ping_runner
28from autotest_lib.client.cros import constants as client_constants
29from autotest_lib.server import afe_utils
30from autotest_lib.server import site_utils as server_site_utils
31from autotest_lib.server.cros import dnsname_mangler
32from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
33from autotest_lib.server.cros.dynamic_suite import control_file_getter
34from autotest_lib.server.cros.servo import servo
35from autotest_lib.server.hosts import servo_repair
36from autotest_lib.server.hosts import ssh_host
37from autotest_lib.site_utils.rpm_control_system import rpm_client
38
39try:
40    from chromite.lib import metrics
41except ImportError:
42    metrics = utils.metrics_mock
43
44
45# Names of the host attributes in the database that represent the values for
46# the servo_host and servo_port for a servo connected to the DUT.
47SERVO_HOST_ATTR = 'servo_host'
48SERVO_PORT_ATTR = 'servo_port'
49SERVO_BOARD_ATTR = 'servo_board'
50SERVO_SERIAL_ATTR = 'servo_serial'
51
52_CONFIG = global_config.global_config
53ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
54        'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
55
56AUTOTEST_BASE = _CONFIG.get_config_value(
57        'SCHEDULER', 'drone_installation_directory',
58        default='/usr/local/autotest')
59
60_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
61_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
62
63class ServoHost(ssh_host.SSHHost):
64    """Host class for a host that controls a servo, e.g. beaglebone."""
65
66    DEFAULT_PORT = 9999
67
68    # Timeout for initializing servo signals.
69    INITIALIZE_SERVO_TIMEOUT_SECS = 60
70
71    # Ready test function
72    SERVO_READY_METHOD = 'get_version'
73
74    REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
75
76
77    def _initialize(self, servo_host='localhost',
78                    servo_port=DEFAULT_PORT, servo_board=None,
79                    servo_serial=None, is_in_lab=None, *args, **dargs):
80        """Initialize a ServoHost instance.
81
82        A ServoHost instance represents a host that controls a servo.
83
84        @param servo_host: Name of the host where the servod process
85                           is running.
86        @param servo_port: Port the servod process is listening on.
87        @param servo_board: Board that the servo is connected to.
88        @param is_in_lab: True if the servo host is in Cros Lab. Default is set
89                          to None, for which utils.host_is_in_lab_zone will be
90                          called to check if the servo host is in Cros lab.
91
92        """
93        super(ServoHost, self)._initialize(hostname=servo_host,
94                                           *args, **dargs)
95        self.servo_port = servo_port
96        self.servo_board = servo_board
97        self.servo_serial = servo_serial
98        self._servo = None
99        self._repair_strategy = (
100                servo_repair.create_servo_repair_strategy())
101        self._is_localhost = (self.hostname == 'localhost')
102        if self._is_localhost:
103            self._is_in_lab = False
104        elif is_in_lab is None:
105            self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
106        else:
107            self._is_in_lab = is_in_lab
108
109        # Commands on the servo host must be run by the superuser.
110        # Our account on a remote host is root, but if our target is
111        # localhost then we might be running unprivileged.  If so,
112        # `sudo` will have to be added to the commands.
113        if self._is_localhost:
114            self._sudo_required = utils.system_output('id -u') != '0'
115        else:
116            self._sudo_required = False
117
118
119    def connect_servo(self):
120        """Establish a connection to the servod server on this host.
121
122        Initializes `self._servo` and then verifies that all network
123        connections are working.  This will create an ssh tunnel if
124        it's required.
125
126        As a side effect of testing the connection, all signals on the
127        target servo are reset to default values, and the USB stick is
128        set to the neutral (off) position.
129        """
130        servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
131        timeout, _ = retry.timeout(
132                servo_obj.initialize_dut,
133                timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
134        if timeout:
135            raise hosts.AutoservVerifyError(
136                    'Servo initialize timed out.')
137        self._servo = servo_obj
138
139
140    def disconnect_servo(self):
141        """Disconnect our servo if it exists.
142
143        If we've previously successfully connected to our servo,
144        disconnect any established ssh tunnel, and set `self._servo`
145        back to `None`.
146        """
147        if self._servo:
148            # N.B. This call is safe even without a tunnel:
149            # rpc_server_tracker.disconnect() silently ignores
150            # unknown ports.
151            self.rpc_server_tracker.disconnect(self.servo_port)
152            self._servo = None
153
154
155    def is_in_lab(self):
156        """Check whether the servo host is a lab device.
157
158        @returns: True if the servo host is in Cros Lab, otherwise False.
159
160        """
161        return self._is_in_lab
162
163
164    def is_localhost(self):
165        """Checks whether the servo host points to localhost.
166
167        @returns: True if it points to localhost, otherwise False.
168
169        """
170        return self._is_localhost
171
172
173    def get_servod_server_proxy(self):
174        """Return a proxy that can be used to communicate with servod server.
175
176        @returns: An xmlrpclib.ServerProxy that is connected to the servod
177                  server on the host.
178        """
179        if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
180            return self.rpc_server_tracker.xmlrpc_connect(
181                    None, self.servo_port,
182                    ready_test_name=self.SERVO_READY_METHOD,
183                    timeout_seconds=60)
184        else:
185            remote = 'http://%s:%s' % (self.hostname, self.servo_port)
186            return xmlrpclib.ServerProxy(remote)
187
188
189    def is_cros_host(self):
190        """Check if a servo host is running chromeos.
191
192        @return: True if the servo host is running chromeos.
193            False if it isn't, or we don't have enough information.
194        """
195        try:
196            result = self.run('grep -q CHROMEOS /etc/lsb-release',
197                              ignore_status=True, timeout=10)
198        except (error.AutoservRunError, error.AutoservSSHTimeout):
199            return False
200        return result.exit_status == 0
201
202
203    def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
204                         connect_timeout=None, alive_interval=None,
205                         alive_count_max=None, connection_attempts=None):
206        """Override default make_ssh_command to use tuned options.
207
208        Tuning changes:
209          - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
210          connection failure. Consistency with remote_access.py.
211
212          - ServerAliveInterval=180; which causes SSH to ping connection every
213          180 seconds. In conjunction with ServerAliveCountMax ensures
214          that if the connection dies, Autotest will bail out quickly.
215
216          - ServerAliveCountMax=3; consistency with remote_access.py.
217
218          - ConnectAttempts=4; reduce flakiness in connection errors;
219          consistency with remote_access.py.
220
221          - UserKnownHostsFile=/dev/null; we don't care about the keys.
222
223          - SSH protocol forced to 2; needed for ServerAliveInterval.
224
225        @param user User name to use for the ssh connection.
226        @param port Port on the target host to use for ssh connection.
227        @param opts Additional options to the ssh command.
228        @param hosts_file Ignored.
229        @param connect_timeout Ignored.
230        @param alive_interval Ignored.
231        @param alive_count_max Ignored.
232        @param connection_attempts Ignored.
233
234        @returns: An ssh command with the requested settings.
235
236        """
237        options = ' '.join([opts, '-o Protocol=2'])
238        return super(ServoHost, self).make_ssh_command(
239            user=user, port=port, opts=options, hosts_file='/dev/null',
240            connect_timeout=30, alive_interval=180, alive_count_max=3,
241            connection_attempts=4)
242
243
244    def _make_scp_cmd(self, sources, dest):
245        """Format scp command.
246
247        Given a list of source paths and a destination path, produces the
248        appropriate scp command for encoding it. Remote paths must be
249        pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
250        to allow additional ssh options.
251
252        @param sources: A list of source paths to copy from.
253        @param dest: Destination path to copy to.
254
255        @returns: An scp command that copies |sources| on local machine to
256                  |dest| on the remote servo host.
257
258        """
259        command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
260                   '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
261        return command % (self.master_ssh_option,
262                          self.port, ' '.join(sources), dest)
263
264
265    def run(self, command, timeout=3600, ignore_status=False,
266            stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
267            connect_timeout=30, ssh_failure_retry_ok=False,
268            options='', stdin=None, verbose=True, args=()):
269        """Run a command on the servo host.
270
271        Extends method `run` in SSHHost. If the servo host is a remote device,
272        it will call `run` in SSHost without changing anything.
273        If the servo host is 'localhost', it will call utils.system_output.
274
275        @param command: The command line string.
276        @param timeout: Time limit in seconds before attempting to
277                        kill the running process. The run() function
278                        will take a few seconds longer than 'timeout'
279                        to complete if it has to kill the process.
280        @param ignore_status: Do not raise an exception, no matter
281                              what the exit code of the command is.
282        @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
283        @param connect_timeout: SSH connection timeout (in seconds)
284                                Ignored if host is 'localhost'.
285        @param options: String with additional ssh command options
286                        Ignored if host is 'localhost'.
287        @param ssh_failure_retry_ok: when True and ssh connection failure is
288                                     suspected, OK to retry command (but not
289                                     compulsory, and likely not needed here)
290        @param stdin: Stdin to pass (a string) to the executed command.
291        @param verbose: Log the commands.
292        @param args: Sequence of strings to pass as arguments to command by
293                     quoting them in " and escaping their contents if necessary.
294
295        @returns: A utils.CmdResult object.
296
297        @raises AutoservRunError if the command failed.
298        @raises AutoservSSHTimeout SSH connection has timed out. Only applies
299                when servo host is not 'localhost'.
300
301        """
302        run_args = {'command': command, 'timeout': timeout,
303                    'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
304                    'stderr_tee': stderr_tee, 'stdin': stdin,
305                    'verbose': verbose, 'args': args}
306        if self.is_localhost():
307            if self._sudo_required:
308                run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
309                        command)
310            try:
311                return utils.run(**run_args)
312            except error.CmdError as e:
313                logging.error(e)
314                raise error.AutoservRunError('command execution error',
315                                             e.result_obj)
316        else:
317            run_args['connect_timeout'] = connect_timeout
318            run_args['options'] = options
319            return super(ServoHost, self).run(**run_args)
320
321
322    def _get_release_version(self):
323        """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
324
325        @returns The version string in lsb-release, under attribute
326                 CHROMEOS_RELEASE_VERSION.
327        """
328        lsb_release_content = self.run(
329                    'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
330        return lsbrelease_utils.get_chromeos_release_version(
331                    lsb_release_content=lsb_release_content)
332
333
334    def get_attached_duts(self, afe):
335        """Gather a list of duts that use this servo host.
336
337        @param afe: afe instance.
338
339        @returns list of duts.
340        """
341        return afe.get_hosts_by_attribute(
342                attribute=SERVO_HOST_ATTR, value=self.hostname)
343
344
345    def get_board(self):
346        """Determine the board for this servo host.
347
348        @returns a string representing this servo host's board.
349        """
350        return lsbrelease_utils.get_current_board(
351                lsb_release_content=self.run('cat /etc/lsb-release').stdout)
352
353
354    def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
355        """Choose which dut to schedule servo host reboot job.
356
357        We'll want a semi-deterministic way of selecting which host should be
358        scheduled for the servo host reboot job.  For now we'll sort the
359        list with the expectation the dut list will stay consistent.
360        From there we'll grab the first dut that is available so we
361        don't schedule a job on a dut that will never run.
362
363        @param dut_list:  List of the dut hostnames to choose from.
364        @param afe:       Instance of the AFE.
365
366        @return hostname of dut to schedule job on.
367        """
368        afe_hosts = afe.get_hosts(dut_list)
369        afe_hosts.sort()
370        for afe_host in afe_hosts:
371            if afe_host.status not in host_states.UNAVAILABLE_STATES:
372                return afe_host.hostname
373        # If they're all unavailable, just return the first sorted dut.
374        dut_list.sort()
375        return dut_list[0]
376
377
378    def _sync_job_scheduled_for_duts(self, dut_list, afe):
379        """Checks if a synchronized reboot has been scheduled for these duts.
380
381        Grab all the host queue entries that aren't completed for the duts and
382        see if any of them have the expected job name.
383
384        @param dut_list:  List of duts to check on.
385        @param afe:       Instance of the AFE.
386
387        @returns True if the job is scheduled, False otherwise.
388        """
389        afe_hosts = afe.get_hosts(dut_list)
390        for afe_host in afe_hosts:
391            hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
392            for hqe in hqes:
393                job = afe.get_jobs(id=hqe.job.id)
394                if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
395                                           _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
396                    return True
397        return False
398
399
400    def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
401        """Schedule a job to reboot the servo host.
402
403        When we schedule a job, it will create a ServoHost object which will
404        go through this entire flow of checking if a reboot is needed and
405        trying to schedule it.  There is probably a better approach to setting
406        up a synchronized reboot but I'm coming up short on better ideas so I
407        apologize for this circus show.
408
409        @param dut_list:      List of duts that need to be locked.
410        @param afe:           Instance of afe.
411        @param force_reboot:  Boolean to indicate if a forced reboot should be
412                              scheduled or not.
413        """
414        # If we've already scheduled job on a dut, we're done here.
415        if self._sync_job_scheduled_for_duts(dut_list, afe):
416            return
417
418        # Looks like we haven't scheduled a job yet.
419        test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
420                else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
421        dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
422        getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
423        control_file = getter.get_control_file_contents_by_name(test)
424        control_type = control_data.CONTROL_TYPE_NAMES.SERVER
425        try:
426            afe.create_job(control_file=control_file, name=test,
427                           control_type=control_type, hosts=[dut])
428        except Exception as e:
429            # Sometimes creating the job will raise an exception. We'll log it
430            # but we don't want to fail because of it.
431            logging.exception('Scheduling reboot job failed due to Exception.')
432
433
434    def reboot(self, *args, **dargs):
435        """Reboot using special servo host reboot command."""
436        super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
437                                      *args, **dargs)
438
439
440    def _check_for_reboot(self, updater):
441        """Reboot this servo host if an upgrade is waiting.
442
443        If the host has successfully downloaded and finalized a new
444        build, reboot.
445
446        @param updater: a ChromiumOSUpdater instance for checking
447            whether reboot is needed.
448        @return Return a (status, build) tuple reflecting the
449            update_engine status and current build of the host
450            at the end of the call.
451        """
452        current_build_number = self._get_release_version()
453        status = updater.check_update_status()
454        if status == autoupdater.UPDATER_NEED_REBOOT:
455            # Check if we need to schedule an organized reboot.
456            afe = frontend_wrappers.RetryingAFE(
457                    timeout_min=5, delay_sec=10,
458                    server=server_site_utils.get_global_afe_hostname())
459            dut_list = self.get_attached_duts(afe)
460            logging.info('servo host has the following duts: %s', dut_list)
461            if len(dut_list) > 1:
462                logging.info('servo host has multiple duts, scheduling '
463                             'synchronized reboot')
464                self.schedule_synchronized_reboot(dut_list, afe)
465                return status, current_build_number
466
467            logging.info('Rebooting servo host %s from build %s',
468                         self.hostname, current_build_number)
469            # Tell the reboot() call not to wait for completion.
470            # Otherwise, the call will log reboot failure if servo does
471            # not come back.  The logged reboot failure will lead to
472            # test job failure.  If the test does not require servo, we
473            # don't want servo failure to fail the test with error:
474            # `Host did not return from reboot` in status.log.
475            self.reboot(fastsync=True, wait=False)
476
477            # We told the reboot() call not to wait, but we need to wait
478            # for the reboot before we continue.  Alas.  The code from
479            # here below is basically a copy of Host.wait_for_restart(),
480            # with the logging bits ripped out, so that they can't cause
481            # the failure logging problem described above.
482            #
483            # The black stain that this has left on my soul can never be
484            # erased.
485            old_boot_id = self.get_boot_id()
486            if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
487                                  warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
488                                  old_boot_id=old_boot_id):
489                raise error.AutoservHostError(
490                        'servo host %s failed to shut down.' %
491                        self.hostname)
492            if self.wait_up(timeout=120):
493                current_build_number = self._get_release_version()
494                status = updater.check_update_status()
495                logging.info('servo host %s back from reboot, with build %s',
496                             self.hostname, current_build_number)
497            else:
498                raise error.AutoservHostError(
499                        'servo host %s failed to come back from reboot.' %
500                        self.hostname)
501        return status, current_build_number
502
503
504    def update_image(self, wait_for_update=False):
505        """Update the image on the servo host, if needed.
506
507        This method recognizes the following cases:
508          * If the Host is not running Chrome OS, do nothing.
509          * If a previously triggered update is now complete, reboot
510            to the new version.
511          * If the host is processing a previously triggered update,
512            do nothing.
513          * If the host is running a version of Chrome OS different
514            from the default for servo Hosts, trigger an update, but
515            don't wait for it to complete.
516
517        @param wait_for_update If an update needs to be applied and
518            this is true, then don't return until the update is
519            downloaded and finalized, and the host rebooted.
520        @raises dev_server.DevServerException: If all the devservers are down.
521        @raises site_utils.ParseBuildNameException: If the devserver returns
522            an invalid build name.
523        @raises autoupdater.ChromiumOSError: If something goes wrong in the
524            checking update engine client status or applying an update.
525        @raises AutoservRunError: If the update_engine_client isn't present on
526            the host, and the host is a cros_host.
527
528        """
529        # servod could be running in a Ubuntu workstation.
530        if not self.is_cros_host():
531            logging.info('Not attempting an update, either %s is not running '
532                         'chromeos or we cannot find enough information about '
533                         'the host.', self.hostname)
534            return
535
536        if lsbrelease_utils.is_moblab():
537            logging.info('Not attempting an update, %s is running moblab.',
538                         self.hostname)
539            return
540
541        target_build = afe_utils.get_stable_cros_image_name(self.get_board())
542        target_build_number = server_site_utils.ParseBuildName(
543                target_build)[3]
544        # For servo image staging, we want it as more widely distributed as
545        # possible, so that devservers' load can be evenly distributed. So use
546        # hostname instead of target_build as hash.
547        ds = dev_server.ImageServer.resolve(self.hostname,
548                                            hostname=self.hostname)
549        url = ds.get_update_url(target_build)
550
551        updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
552        status, current_build_number = self._check_for_reboot(updater)
553        update_pending = True
554        if status in autoupdater.UPDATER_PROCESSING_UPDATE:
555            logging.info('servo host %s already processing an update, update '
556                         'engine client status=%s', self.hostname, status)
557        elif status == autoupdater.UPDATER_NEED_REBOOT:
558            return
559        elif current_build_number != target_build_number:
560            logging.info('Using devserver url: %s to trigger update on '
561                         'servo host %s, from %s to %s', url, self.hostname,
562                         current_build_number, target_build_number)
563            try:
564                ds.stage_artifacts(target_build,
565                                   artifacts=['full_payload'])
566            except Exception as e:
567                logging.error('Staging artifacts failed: %s', str(e))
568                logging.error('Abandoning update for this cycle.')
569            else:
570                try:
571                    # TODO(jrbarnette): This 'touch' is a gross hack
572                    # to get us past crbug.com/613603.  Once that
573                    # bug is resolved, we should remove this code.
574                    self.run('touch /home/chronos/.oobe_completed')
575                    updater.trigger_update()
576                except autoupdater.RootFSUpdateError as e:
577                    trigger_download_status = 'failed with %s' % str(e)
578                    metrics.Counter('chromeos/autotest/servo/'
579                                    'rootfs_update_failed').increment()
580                else:
581                    trigger_download_status = 'passed'
582                logging.info('Triggered download and update %s for %s, '
583                             'update engine currently in status %s',
584                             trigger_download_status, self.hostname,
585                             updater.check_update_status())
586        else:
587            logging.info('servo host %s does not require an update.',
588                         self.hostname)
589            update_pending = False
590
591        if update_pending and wait_for_update:
592            logging.info('Waiting for servo update to complete.')
593            self.run('update_engine_client --follow', ignore_status=True)
594
595
596    def verify(self, silent=False):
597        """Update the servo host and verify it's in a good state.
598
599        @param silent   If true, suppress logging in `status.log`.
600        """
601        # TODO(jrbarnette) Old versions of beaglebone_servo include
602        # the powerd package.  If you touch the .oobe_completed file
603        # (as we do to work around an update_engine problem), then
604        # powerd will eventually shut down the beaglebone for lack
605        # of (apparent) activity.  Current versions of
606        # beaglebone_servo don't have powerd, but until we can purge
607        # the lab of the old images, we need to make sure powerd
608        # isn't running.
609        self.run('stop powerd', ignore_status=True)
610        try:
611            self._repair_strategy.verify(self, silent)
612        except:
613            self.disconnect_servo()
614            raise
615
616
617    def repair(self, silent=False):
618        """Attempt to repair servo host.
619
620        @param silent   If true, suppress logging in `status.log`.
621        """
622        try:
623            self._repair_strategy.repair(self, silent)
624        except:
625            self.disconnect_servo()
626            raise
627
628
629    def has_power(self):
630        """Return whether or not the servo host is powered by PoE."""
631        # TODO(fdeng): See crbug.com/302791
632        # For now, assume all servo hosts in the lab have power.
633        return self.is_in_lab()
634
635
636    def power_cycle(self):
637        """Cycle power to this host via PoE if it is a lab device.
638
639        @raises AutoservRepairError if it fails to power cycle the
640                servo host.
641
642        """
643        if self.has_power():
644            try:
645                rpm_client.set_power(self.hostname, 'CYCLE')
646            except (socket.error, xmlrpclib.Error,
647                    httplib.BadStatusLine,
648                    rpm_client.RemotePowerException) as e:
649                raise hosts.AutoservRepairError(
650                        'Power cycling %s failed: %s' % (self.hostname, e))
651        else:
652            logging.info('Skipping power cycling, not a lab device.')
653
654
655    def get_servo(self):
656        """Get the cached servo.Servo object.
657
658        @return: a servo.Servo object.
659        """
660        return self._servo
661
662
663def make_servo_hostname(dut_hostname):
664    """Given a DUT's hostname, return the hostname of its servo.
665
666    @param dut_hostname: hostname of a DUT.
667
668    @return hostname of the DUT's servo.
669
670    """
671    host_parts = dut_hostname.split('.')
672    host_parts[0] = host_parts[0] + '-servo'
673    return '.'.join(host_parts)
674
675
676def servo_host_is_up(servo_hostname):
677    """Given a servo host name, return if it's up or not.
678
679    @param servo_hostname: hostname of the servo host.
680
681    @return True if it's up, False otherwise
682    """
683    # Technically, this duplicates the SSH ping done early in the servo
684    # proxy initialization code.  However, this ping ends in a couple
685    # seconds when if fails, rather than the 60 seconds it takes to decide
686    # that an SSH ping has timed out.  Specifically, that timeout happens
687    # when our servo DNS name resolves, but there is no host at that IP.
688    logging.info('Pinging servo host at %s', servo_hostname)
689    ping_config = ping_runner.PingConfig(
690            servo_hostname, count=3,
691            ignore_result=True, ignore_status=True)
692    return ping_runner.PingRunner().ping(ping_config).received > 0
693
694
695def _map_afe_board_to_servo_board(afe_board):
696    """Map a board we get from the AFE to a servo appropriate value.
697
698    Many boards are identical to other boards for servo's purposes.
699    This function makes that mapping.
700
701    @param afe_board string board name received from AFE.
702    @return board we expect servo to have.
703
704    """
705    KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
706    BOARD_MAP = {'gizmo': 'panther'}
707    mapped_board = afe_board
708    if afe_board in BOARD_MAP:
709        mapped_board = BOARD_MAP[afe_board]
710    else:
711        for suffix in KNOWN_SUFFIXES:
712            if afe_board.endswith(suffix):
713                mapped_board = afe_board[0:-len(suffix)]
714                break
715    if mapped_board != afe_board:
716        logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
717    return mapped_board
718
719
720def _get_standard_servo_args(dut_host):
721    """Return servo data associated with a given DUT.
722
723    This checks for the presence of servo host and port attached to the
724    given `dut_host`.  This data should be stored in the
725    `_afe_host.attributes` field in the provided `dut_host` parameter.
726
727    @param dut_host   Instance of `Host` on which to find the servo
728                      attributes.
729    @return A tuple of `servo_args` dict with host and an option port,
730            plus an `is_in_lab` flag indicating whether this in the CrOS
731            test lab, or some different environment.
732    """
733    servo_args = None
734    is_in_lab = False
735    is_ssp_moblab = False
736    if utils.is_in_container():
737        is_moblab = _CONFIG.get_config_value(
738                'SSP', 'is_moblab', type=bool, default=False)
739        is_ssp_moblab = is_moblab
740    else:
741        is_moblab = utils.is_moblab()
742    attrs = dut_host._afe_host.attributes
743    if attrs and SERVO_HOST_ATTR in attrs:
744        servo_host = attrs[SERVO_HOST_ATTR]
745        if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
746            servo_host = _CONFIG.get_config_value(
747                    'SSP', 'host_container_ip', type=str, default=None)
748        servo_args = {SERVO_HOST_ATTR: servo_host}
749        if SERVO_PORT_ATTR in attrs:
750            try:
751                servo_port = attrs[SERVO_PORT_ATTR]
752                servo_args[SERVO_PORT_ATTR] = int(servo_port)
753            except ValueError:
754                logging.error('servo port is not an int: %s', servo_port)
755                # Let's set the servo args to None since we're not creating
756                # the ServoHost object with the proper port now.
757                servo_args = None
758        if SERVO_SERIAL_ATTR in attrs:
759            servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR]
760        is_in_lab = (not is_moblab
761                     and utils.host_is_in_lab_zone(servo_host))
762
763    # TODO(jrbarnette):  This test to use the default lab servo hostname
764    # is a legacy that we need only until every host in the DB has
765    # proper attributes.
766    elif (not is_moblab and
767            not dnsname_mangler.is_ip_address(dut_host.hostname)):
768        servo_host = make_servo_hostname(dut_host.hostname)
769        is_in_lab = utils.host_is_in_lab_zone(servo_host)
770        if is_in_lab:
771            servo_args = {SERVO_HOST_ATTR: servo_host}
772    if servo_args is not None:
773        info = dut_host.host_info_store.get()
774        if info.board:
775            servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(
776                    info.board)
777    return servo_args, is_in_lab
778
779
780def create_servo_host(dut, servo_args, try_lab_servo=False,
781                      try_servo_repair=False):
782    """Create a ServoHost object for a given DUT, if appropriate.
783
784    This function attempts to create and verify or repair a `ServoHost`
785    object for a servo connected to the given `dut`, subject to various
786    constraints imposed by the parameters:
787      * When the `servo_args` parameter is not `None`, a servo
788        host must be created, and must be checked with `repair()`.
789      * Otherwise, if a servo exists in the lab and `try_lab_servo` is
790        true:
791          * If `try_servo_repair` is true, then create a servo host and
792            check it with `repair()`.
793          * Otherwise, if the servo responds to `ping` then create a
794            servo host and check it with `verify()`.
795
796    In cases where `servo_args` was not `None`, repair failure
797    exceptions are passed back to the caller; otherwise, exceptions
798    are logged and then discarded.  Note that this only happens in cases
799    where we're called from a test (not special task) control file that
800    has an explicit dependency on servo.  In that case, we require that
801    repair not write to `status.log`, so as to avoid polluting test
802    results.
803
804    TODO(jrbarnette):  The special handling for servo in test control
805    files is a thorn in my flesh; I dearly hope to see it cut out before
806    my retirement.
807
808    Parameters for a servo host consist of a host name, port number, and
809    DUT board, and are determined from one of these sources, in order of
810    priority:
811      * Servo attributes from the `dut` parameter take precedence over
812        all other sources of information.
813      * If a DNS entry for the servo based on the DUT hostname exists in
814        the CrOS lab network, that hostname is used with the default
815        port and the DUT's board.
816      * If no other options are found, the parameters will be taken
817        from the `servo_args` dict passed in from the caller.
818
819    @param dut            An instance of `Host` from which to take
820                          servo parameters (if available).
821    @param servo_args     A dictionary with servo parameters to use if
822                          they can't be found from `dut`.  If this
823                          argument is supplied, unrepaired exceptions
824                          from `verify()` will be passed back to the
825                          caller.
826    @param try_lab_servo  If not true, servo host creation will be
827                          skipped unless otherwise required by the
828                          caller.
829    @param try_servo_repair  If true, check a servo host with
830                          `repair()` instead of `verify()`.
831
832    @returns: A ServoHost object or None. See comments above.
833
834    """
835    servo_dependency = servo_args is not None
836    is_in_lab = False
837    if dut is not None and (try_lab_servo or servo_dependency):
838        servo_args_override, is_in_lab = _get_standard_servo_args(dut)
839        if servo_args_override is not None:
840            servo_args = servo_args_override
841    if servo_args is None:
842        return None
843    if (not servo_dependency and not try_servo_repair and
844            not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
845        return None
846    newhost = ServoHost(is_in_lab=is_in_lab, **servo_args)
847    # Note that the logic of repair() includes everything done
848    # by verify().  It's sufficient to call one or the other;
849    # we don't need both.
850    if servo_dependency:
851        newhost.repair(silent=True)
852    else:
853        try:
854            if try_servo_repair:
855                newhost.repair()
856            else:
857                newhost.verify()
858        except Exception:
859            operation = 'repair' if try_servo_repair else 'verification'
860            logging.exception('Servo %s failed for %s',
861                              operation, newhost.hostname)
862    return newhost
863