• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import xmlrpclib
16
17from autotest_lib.client.bin import utils
18from autotest_lib.client.common_lib import control_data
19from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
21from autotest_lib.client.common_lib import host_states
22from autotest_lib.client.common_lib import hosts
23from autotest_lib.client.common_lib import lsbrelease_utils
24from autotest_lib.client.common_lib.cros import autoupdater
25from autotest_lib.client.common_lib.cros import dev_server
26from autotest_lib.client.common_lib.cros import retry
27from autotest_lib.client.common_lib.cros.network import ping_runner
28from autotest_lib.client.cros import constants as client_constants
29from autotest_lib.server import afe_utils
30from autotest_lib.server import site_utils as server_site_utils
31from autotest_lib.server.cros import dnsname_mangler
32from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
33from autotest_lib.server.cros.dynamic_suite import control_file_getter
34from autotest_lib.server.cros.servo import servo
35from autotest_lib.server.hosts import servo_repair
36from autotest_lib.server.hosts import ssh_host
37from autotest_lib.site_utils.rpm_control_system import rpm_client
38
39try:
40    from chromite.lib import metrics
41except ImportError:
42    metrics = utils.metrics_mock
43
44
45# Names of the host attributes in the database that represent the values for
46# the servo_host and servo_port for a servo connected to the DUT.
47SERVO_HOST_ATTR = 'servo_host'
48SERVO_PORT_ATTR = 'servo_port'
49SERVO_BOARD_ATTR = 'servo_board'
50SERVO_SERIAL_ATTR = 'servo_serial'
51
52_CONFIG = global_config.global_config
53ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
54        'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
55
56AUTOTEST_BASE = _CONFIG.get_config_value(
57        'SCHEDULER', 'drone_installation_directory',
58        default='/usr/local/autotest')
59
60_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
61_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
62
63class ServoHost(ssh_host.SSHHost):
64    """Host class for a host that controls a servo, e.g. beaglebone."""
65
66    DEFAULT_PORT = 9999
67
68    # Timeout for initializing servo signals.
69    INITIALIZE_SERVO_TIMEOUT_SECS = 30
70
71    # Ready test function
72    SERVO_READY_METHOD = 'get_version'
73
74    REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
75
76
77    def _initialize(self, servo_host='localhost',
78                    servo_port=DEFAULT_PORT, servo_board=None,
79                    servo_serial=None, is_in_lab=None, *args, **dargs):
80        """Initialize a ServoHost instance.
81
82        A ServoHost instance represents a host that controls a servo.
83
84        @param servo_host: Name of the host where the servod process
85                           is running.
86        @param servo_port: Port the servod process is listening on.
87        @param servo_board: Board that the servo is connected to.
88        @param is_in_lab: True if the servo host is in Cros Lab. Default is set
89                          to None, for which utils.host_is_in_lab_zone will be
90                          called to check if the servo host is in Cros lab.
91
92        """
93        super(ServoHost, self)._initialize(hostname=servo_host,
94                                           *args, **dargs)
95        self.servo_port = servo_port
96        self.servo_board = servo_board
97        self.servo_serial = servo_serial
98        self._servo = None
99        self._repair_strategy = (
100                servo_repair.create_servo_repair_strategy())
101        self._is_localhost = (self.hostname == 'localhost')
102        if self._is_localhost:
103            self._is_in_lab = False
104        elif is_in_lab is None:
105            self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
106        else:
107            self._is_in_lab = is_in_lab
108
109        # Commands on the servo host must be run by the superuser.
110        # Our account on a remote host is root, but if our target is
111        # localhost then we might be running unprivileged.  If so,
112        # `sudo` will have to be added to the commands.
113        if self._is_localhost:
114            self._sudo_required = utils.system_output('id -u') != '0'
115        else:
116            self._sudo_required = False
117
118
119    def connect_servo(self):
120        """Establish a connection to the servod server on this host.
121
122        Initializes `self._servo` and then verifies that all network
123        connections are working.  This will create an ssh tunnel if
124        it's required.
125
126        As a side effect of testing the connection, all signals on the
127        target servo are reset to default values, and the USB stick is
128        set to the neutral (off) position.
129        """
130        servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
131        timeout, _ = retry.timeout(
132                servo_obj.initialize_dut,
133                timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
134        if timeout:
135            raise hosts.AutoservVerifyError(
136                    'Servo initialize timed out.')
137        self._servo = servo_obj
138
139
140    def disconnect_servo(self):
141        """Disconnect our servo if it exists.
142
143        If we've previously successfully connected to our servo,
144        disconnect any established ssh tunnel, and set `self._servo`
145        back to `None`.
146        """
147        if self._servo:
148            # N.B. This call is safe even without a tunnel:
149            # rpc_server_tracker.disconnect() silently ignores
150            # unknown ports.
151            self.rpc_server_tracker.disconnect(self.servo_port)
152            self._servo = None
153
154
155    def is_in_lab(self):
156        """Check whether the servo host is a lab device.
157
158        @returns: True if the servo host is in Cros Lab, otherwise False.
159
160        """
161        return self._is_in_lab
162
163
164    def is_localhost(self):
165        """Checks whether the servo host points to localhost.
166
167        @returns: True if it points to localhost, otherwise False.
168
169        """
170        return self._is_localhost
171
172
173    def get_servod_server_proxy(self):
174        """Return a proxy that can be used to communicate with servod server.
175
176        @returns: An xmlrpclib.ServerProxy that is connected to the servod
177                  server on the host.
178        """
179        if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
180            return self.rpc_server_tracker.xmlrpc_connect(
181                    None, self.servo_port,
182                    ready_test_name=self.SERVO_READY_METHOD,
183                    timeout_seconds=60)
184        else:
185            remote = 'http://%s:%s' % (self.hostname, self.servo_port)
186            return xmlrpclib.ServerProxy(remote)
187
188
189    def is_cros_host(self):
190        """Check if a servo host is running chromeos.
191
192        @return: True if the servo host is running chromeos.
193            False if it isn't, or we don't have enough information.
194        """
195        try:
196            result = self.run('grep -q CHROMEOS /etc/lsb-release',
197                              ignore_status=True, timeout=10)
198        except (error.AutoservRunError, error.AutoservSSHTimeout):
199            return False
200        return result.exit_status == 0
201
202
203    def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
204                         connect_timeout=None, alive_interval=None):
205        """Override default make_ssh_command to use tuned options.
206
207        Tuning changes:
208          - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
209          connection failure. Consistency with remote_access.py.
210
211          - ServerAliveInterval=180; which causes SSH to ping connection every
212          180 seconds. In conjunction with ServerAliveCountMax ensures
213          that if the connection dies, Autotest will bail out quickly.
214
215          - ServerAliveCountMax=3; consistency with remote_access.py.
216
217          - ConnectAttempts=4; reduce flakiness in connection errors;
218          consistency with remote_access.py.
219
220          - UserKnownHostsFile=/dev/null; we don't care about the keys.
221
222          - SSH protocol forced to 2; needed for ServerAliveInterval.
223
224        @param user User name to use for the ssh connection.
225        @param port Port on the target host to use for ssh connection.
226        @param opts Additional options to the ssh command.
227        @param hosts_file Ignored.
228        @param connect_timeout Ignored.
229        @param alive_interval Ignored.
230
231        @returns: An ssh command with the requested settings.
232
233        """
234        base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
235                        ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
236                        ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
237                        ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
238                        ' -o Protocol=2 -l %s -p %d')
239        return base_command % (opts, user, port)
240
241
242    def _make_scp_cmd(self, sources, dest):
243        """Format scp command.
244
245        Given a list of source paths and a destination path, produces the
246        appropriate scp command for encoding it. Remote paths must be
247        pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
248        to allow additional ssh options.
249
250        @param sources: A list of source paths to copy from.
251        @param dest: Destination path to copy to.
252
253        @returns: An scp command that copies |sources| on local machine to
254                  |dest| on the remote servo host.
255
256        """
257        command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
258                   '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
259        return command % (self.master_ssh_option,
260                          self.port, ' '.join(sources), dest)
261
262
263    def run(self, command, timeout=3600, ignore_status=False,
264            stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
265            connect_timeout=30, ssh_failure_retry_ok=False,
266            options='', stdin=None, verbose=True, args=()):
267        """Run a command on the servo host.
268
269        Extends method `run` in SSHHost. If the servo host is a remote device,
270        it will call `run` in SSHost without changing anything.
271        If the servo host is 'localhost', it will call utils.system_output.
272
273        @param command: The command line string.
274        @param timeout: Time limit in seconds before attempting to
275                        kill the running process. The run() function
276                        will take a few seconds longer than 'timeout'
277                        to complete if it has to kill the process.
278        @param ignore_status: Do not raise an exception, no matter
279                              what the exit code of the command is.
280        @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
281        @param connect_timeout: SSH connection timeout (in seconds)
282                                Ignored if host is 'localhost'.
283        @param options: String with additional ssh command options
284                        Ignored if host is 'localhost'.
285        @param ssh_failure_retry_ok: when True and ssh connection failure is
286                                     suspected, OK to retry command (but not
287                                     compulsory, and likely not needed here)
288        @param stdin: Stdin to pass (a string) to the executed command.
289        @param verbose: Log the commands.
290        @param args: Sequence of strings to pass as arguments to command by
291                     quoting them in " and escaping their contents if necessary.
292
293        @returns: A utils.CmdResult object.
294
295        @raises AutoservRunError if the command failed.
296        @raises AutoservSSHTimeout SSH connection has timed out. Only applies
297                when servo host is not 'localhost'.
298
299        """
300        run_args = {'command': command, 'timeout': timeout,
301                    'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
302                    'stderr_tee': stderr_tee, 'stdin': stdin,
303                    'verbose': verbose, 'args': args}
304        if self.is_localhost():
305            if self._sudo_required:
306                run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
307                        command)
308            try:
309                return utils.run(**run_args)
310            except error.CmdError as e:
311                logging.error(e)
312                raise error.AutoservRunError('command execution error',
313                                             e.result_obj)
314        else:
315            run_args['connect_timeout'] = connect_timeout
316            run_args['options'] = options
317            return super(ServoHost, self).run(**run_args)
318
319
320    def _get_release_version(self):
321        """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
322
323        @returns The version string in lsb-release, under attribute
324                 CHROMEOS_RELEASE_VERSION.
325        """
326        lsb_release_content = self.run(
327                    'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
328        return lsbrelease_utils.get_chromeos_release_version(
329                    lsb_release_content=lsb_release_content)
330
331
332    def get_attached_duts(self, afe):
333        """Gather a list of duts that use this servo host.
334
335        @param afe: afe instance.
336
337        @returns list of duts.
338        """
339        return afe.get_hosts_by_attribute(
340                attribute=SERVO_HOST_ATTR, value=self.hostname)
341
342
343    def get_board(self):
344        """Determine the board for this servo host.
345
346        @returns a string representing this servo host's board.
347        """
348        return lsbrelease_utils.get_current_board(
349                lsb_release_content=self.run('cat /etc/lsb-release').stdout)
350
351
352    def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
353        """Choose which dut to schedule servo host reboot job.
354
355        We'll want a semi-deterministic way of selecting which host should be
356        scheduled for the servo host reboot job.  For now we'll sort the
357        list with the expectation the dut list will stay consistent.
358        From there we'll grab the first dut that is available so we
359        don't schedule a job on a dut that will never run.
360
361        @param dut_list:  List of the dut hostnames to choose from.
362        @param afe:       Instance of the AFE.
363
364        @return hostname of dut to schedule job on.
365        """
366        afe_hosts = afe.get_hosts(dut_list)
367        afe_hosts.sort()
368        for afe_host in afe_hosts:
369            if afe_host.status not in host_states.UNAVAILABLE_STATES:
370                return afe_host.hostname
371        # If they're all unavailable, just return the first sorted dut.
372        dut_list.sort()
373        return dut_list[0]
374
375
376    def _sync_job_scheduled_for_duts(self, dut_list, afe):
377        """Checks if a synchronized reboot has been scheduled for these duts.
378
379        Grab all the host queue entries that aren't completed for the duts and
380        see if any of them have the expected job name.
381
382        @param dut_list:  List of duts to check on.
383        @param afe:       Instance of the AFE.
384
385        @returns True if the job is scheduled, False otherwise.
386        """
387        afe_hosts = afe.get_hosts(dut_list)
388        for afe_host in afe_hosts:
389            hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
390            for hqe in hqes:
391                job = afe.get_jobs(id=hqe.job.id)
392                if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
393                                           _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
394                    return True
395        return False
396
397
398    def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
399        """Schedule a job to reboot the servo host.
400
401        When we schedule a job, it will create a ServoHost object which will
402        go through this entire flow of checking if a reboot is needed and
403        trying to schedule it.  There is probably a better approach to setting
404        up a synchronized reboot but I'm coming up short on better ideas so I
405        apologize for this circus show.
406
407        @param dut_list:      List of duts that need to be locked.
408        @param afe:           Instance of afe.
409        @param force_reboot:  Boolean to indicate if a forced reboot should be
410                              scheduled or not.
411        """
412        # If we've already scheduled job on a dut, we're done here.
413        if self._sync_job_scheduled_for_duts(dut_list, afe):
414            return
415
416        # Looks like we haven't scheduled a job yet.
417        test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
418                else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
419        dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
420        getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
421        control_file = getter.get_control_file_contents_by_name(test)
422        control_type = control_data.CONTROL_TYPE_NAMES.SERVER
423        try:
424            afe.create_job(control_file=control_file, name=test,
425                           control_type=control_type, hosts=[dut])
426        except Exception as e:
427            # Sometimes creating the job will raise an exception. We'll log it
428            # but we don't want to fail because of it.
429            logging.exception('Scheduling reboot job failed due to Exception.')
430
431
432    def reboot(self, *args, **dargs):
433        """Reboot using special servo host reboot command."""
434        super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
435                                      *args, **dargs)
436
437
438    def _check_for_reboot(self, updater):
439        """Reboot this servo host if an upgrade is waiting.
440
441        If the host has successfully downloaded and finalized a new
442        build, reboot.
443
444        @param updater: a ChromiumOSUpdater instance for checking
445            whether reboot is needed.
446        @return Return a (status, build) tuple reflecting the
447            update_engine status and current build of the host
448            at the end of the call.
449        """
450        current_build_number = self._get_release_version()
451        status = updater.check_update_status()
452        if status == autoupdater.UPDATER_NEED_REBOOT:
453            # Check if we need to schedule an organized reboot.
454            afe = frontend_wrappers.RetryingAFE(
455                    timeout_min=5, delay_sec=10,
456                    server=server_site_utils.get_global_afe_hostname())
457            dut_list = self.get_attached_duts(afe)
458            logging.info('servo host has the following duts: %s', dut_list)
459            if len(dut_list) > 1:
460                logging.info('servo host has multiple duts, scheduling '
461                             'synchronized reboot')
462                self.schedule_synchronized_reboot(dut_list, afe)
463                return status, current_build_number
464
465            logging.info('Rebooting servo host %s from build %s',
466                         self.hostname, current_build_number)
467            # Tell the reboot() call not to wait for completion.
468            # Otherwise, the call will log reboot failure if servo does
469            # not come back.  The logged reboot failure will lead to
470            # test job failure.  If the test does not require servo, we
471            # don't want servo failure to fail the test with error:
472            # `Host did not return from reboot` in status.log.
473            self.reboot(fastsync=True, wait=False)
474
475            # We told the reboot() call not to wait, but we need to wait
476            # for the reboot before we continue.  Alas.  The code from
477            # here below is basically a copy of Host.wait_for_restart(),
478            # with the logging bits ripped out, so that they can't cause
479            # the failure logging problem described above.
480            #
481            # The black stain that this has left on my soul can never be
482            # erased.
483            old_boot_id = self.get_boot_id()
484            if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
485                                  warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
486                                  old_boot_id=old_boot_id):
487                raise error.AutoservHostError(
488                        'servo host %s failed to shut down.' %
489                        self.hostname)
490            if self.wait_up(timeout=120):
491                current_build_number = self._get_release_version()
492                status = updater.check_update_status()
493                logging.info('servo host %s back from reboot, with build %s',
494                             self.hostname, current_build_number)
495            else:
496                raise error.AutoservHostError(
497                        'servo host %s failed to come back from reboot.' %
498                        self.hostname)
499        return status, current_build_number
500
501
502    def update_image(self, wait_for_update=False):
503        """Update the image on the servo host, if needed.
504
505        This method recognizes the following cases:
506          * If the Host is not running Chrome OS, do nothing.
507          * If a previously triggered update is now complete, reboot
508            to the new version.
509          * If the host is processing a previously triggered update,
510            do nothing.
511          * If the host is running a version of Chrome OS different
512            from the default for servo Hosts, trigger an update, but
513            don't wait for it to complete.
514
515        @param wait_for_update If an update needs to be applied and
516            this is true, then don't return until the update is
517            downloaded and finalized, and the host rebooted.
518        @raises dev_server.DevServerException: If all the devservers are down.
519        @raises site_utils.ParseBuildNameException: If the devserver returns
520            an invalid build name.
521        @raises autoupdater.ChromiumOSError: If something goes wrong in the
522            checking update engine client status or applying an update.
523        @raises AutoservRunError: If the update_engine_client isn't present on
524            the host, and the host is a cros_host.
525
526        """
527        # servod could be running in a Ubuntu workstation.
528        if not self.is_cros_host():
529            logging.info('Not attempting an update, either %s is not running '
530                         'chromeos or we cannot find enough information about '
531                         'the host.', self.hostname)
532            return
533
534        if lsbrelease_utils.is_moblab():
535            logging.info('Not attempting an update, %s is running moblab.',
536                         self.hostname)
537            return
538
539        target_build = afe_utils.get_stable_cros_image_name(self.get_board())
540        target_build_number = server_site_utils.ParseBuildName(
541                target_build)[3]
542        # For servo image staging, we want it as more widely distributed as
543        # possible, so that devservers' load can be evenly distributed. So use
544        # hostname instead of target_build as hash.
545        ds = dev_server.ImageServer.resolve(self.hostname,
546                                            hostname=self.hostname)
547        url = ds.get_update_url(target_build)
548
549        updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
550        status, current_build_number = self._check_for_reboot(updater)
551        update_pending = True
552        if status in autoupdater.UPDATER_PROCESSING_UPDATE:
553            logging.info('servo host %s already processing an update, update '
554                         'engine client status=%s', self.hostname, status)
555        elif status == autoupdater.UPDATER_NEED_REBOOT:
556            return
557        elif current_build_number != target_build_number:
558            logging.info('Using devserver url: %s to trigger update on '
559                         'servo host %s, from %s to %s', url, self.hostname,
560                         current_build_number, target_build_number)
561            try:
562                ds.stage_artifacts(target_build,
563                                   artifacts=['full_payload'])
564            except Exception as e:
565                logging.error('Staging artifacts failed: %s', str(e))
566                logging.error('Abandoning update for this cycle.')
567            else:
568                try:
569                    # TODO(jrbarnette): This 'touch' is a gross hack
570                    # to get us past crbug.com/613603.  Once that
571                    # bug is resolved, we should remove this code.
572                    self.run('touch /home/chronos/.oobe_completed')
573                    updater.trigger_update()
574                except autoupdater.RootFSUpdateError as e:
575                    trigger_download_status = 'failed with %s' % str(e)
576                    metrics.Counter('chromeos/autotest/servo/'
577                                    'rootfs_update_failed').increment()
578                else:
579                    trigger_download_status = 'passed'
580                logging.info('Triggered download and update %s for %s, '
581                             'update engine currently in status %s',
582                             trigger_download_status, self.hostname,
583                             updater.check_update_status())
584        else:
585            logging.info('servo host %s does not require an update.',
586                         self.hostname)
587            update_pending = False
588
589        if update_pending and wait_for_update:
590            logging.info('Waiting for servo update to complete.')
591            self.run('update_engine_client --follow', ignore_status=True)
592
593
594    def verify(self, silent=False):
595        """Update the servo host and verify it's in a good state.
596
597        @param silent   If true, suppress logging in `status.log`.
598        """
599        # TODO(jrbarnette) Old versions of beaglebone_servo include
600        # the powerd package.  If you touch the .oobe_completed file
601        # (as we do to work around an update_engine problem), then
602        # powerd will eventually shut down the beaglebone for lack
603        # of (apparent) activity.  Current versions of
604        # beaglebone_servo don't have powerd, but until we can purge
605        # the lab of the old images, we need to make sure powerd
606        # isn't running.
607        self.run('stop powerd', ignore_status=True)
608        try:
609            self._repair_strategy.verify(self, silent)
610        except:
611            self.disconnect_servo()
612            raise
613
614
615    def repair(self, silent=False):
616        """Attempt to repair servo host.
617
618        @param silent   If true, suppress logging in `status.log`.
619        """
620        try:
621            self._repair_strategy.repair(self, silent)
622        except:
623            self.disconnect_servo()
624            raise
625
626
627    def has_power(self):
628        """Return whether or not the servo host is powered by PoE."""
629        # TODO(fdeng): See crbug.com/302791
630        # For now, assume all servo hosts in the lab have power.
631        return self.is_in_lab()
632
633
634    def power_cycle(self):
635        """Cycle power to this host via PoE if it is a lab device.
636
637        @raises AutoservRepairError if it fails to power cycle the
638                servo host.
639
640        """
641        if self.has_power():
642            try:
643                rpm_client.set_power(self.hostname, 'CYCLE')
644            except (socket.error, xmlrpclib.Error,
645                    httplib.BadStatusLine,
646                    rpm_client.RemotePowerException) as e:
647                raise hosts.AutoservRepairError(
648                        'Power cycling %s failed: %s' % (self.hostname, e))
649        else:
650            logging.info('Skipping power cycling, not a lab device.')
651
652
653    def get_servo(self):
654        """Get the cached servo.Servo object.
655
656        @return: a servo.Servo object.
657        """
658        return self._servo
659
660
661def make_servo_hostname(dut_hostname):
662    """Given a DUT's hostname, return the hostname of its servo.
663
664    @param dut_hostname: hostname of a DUT.
665
666    @return hostname of the DUT's servo.
667
668    """
669    host_parts = dut_hostname.split('.')
670    host_parts[0] = host_parts[0] + '-servo'
671    return '.'.join(host_parts)
672
673
674def servo_host_is_up(servo_hostname):
675    """Given a servo host name, return if it's up or not.
676
677    @param servo_hostname: hostname of the servo host.
678
679    @return True if it's up, False otherwise
680    """
681    # Technically, this duplicates the SSH ping done early in the servo
682    # proxy initialization code.  However, this ping ends in a couple
683    # seconds when if fails, rather than the 60 seconds it takes to decide
684    # that an SSH ping has timed out.  Specifically, that timeout happens
685    # when our servo DNS name resolves, but there is no host at that IP.
686    logging.info('Pinging servo host at %s', servo_hostname)
687    ping_config = ping_runner.PingConfig(
688            servo_hostname, count=3,
689            ignore_result=True, ignore_status=True)
690    return ping_runner.PingRunner().ping(ping_config).received > 0
691
692
693def _map_afe_board_to_servo_board(afe_board):
694    """Map a board we get from the AFE to a servo appropriate value.
695
696    Many boards are identical to other boards for servo's purposes.
697    This function makes that mapping.
698
699    @param afe_board string board name received from AFE.
700    @return board we expect servo to have.
701
702    """
703    KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
704    BOARD_MAP = {'gizmo': 'panther'}
705    mapped_board = afe_board
706    if afe_board in BOARD_MAP:
707        mapped_board = BOARD_MAP[afe_board]
708    else:
709        for suffix in KNOWN_SUFFIXES:
710            if afe_board.endswith(suffix):
711                mapped_board = afe_board[0:-len(suffix)]
712                break
713    if mapped_board != afe_board:
714        logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
715    return mapped_board
716
717
718def _get_standard_servo_args(dut_host):
719    """Return servo data associated with a given DUT.
720
721    This checks for the presence of servo host and port attached to the
722    given `dut_host`.  This data should be stored in the
723    `_afe_host.attributes` field in the provided `dut_host` parameter.
724
725    @param dut_host   Instance of `Host` on which to find the servo
726                      attributes.
727    @return A tuple of `servo_args` dict with host and an option port,
728            plus an `is_in_lab` flag indicating whether this in the CrOS
729            test lab, or some different environment.
730    """
731    servo_args = None
732    is_in_lab = False
733    is_ssp_moblab = False
734    if utils.is_in_container():
735        is_moblab = _CONFIG.get_config_value(
736                'SSP', 'is_moblab', type=bool, default=False)
737        is_ssp_moblab = is_moblab
738    else:
739        is_moblab = utils.is_moblab()
740    attrs = dut_host._afe_host.attributes
741    if attrs and SERVO_HOST_ATTR in attrs:
742        servo_host = attrs[SERVO_HOST_ATTR]
743        if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
744            servo_host = _CONFIG.get_config_value(
745                    'SSP', 'host_container_ip', type=str, default=None)
746        servo_args = {SERVO_HOST_ATTR: servo_host}
747        if SERVO_PORT_ATTR in attrs:
748            try:
749                servo_port = attrs[SERVO_PORT_ATTR]
750                servo_args[SERVO_PORT_ATTR] = int(servo_port)
751            except ValueError:
752                logging.error('servo port is not an int: %s', servo_port)
753                # Let's set the servo args to None since we're not creating
754                # the ServoHost object with the proper port now.
755                servo_args = None
756        if SERVO_SERIAL_ATTR in attrs:
757            servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR]
758        is_in_lab = (not is_moblab
759                     and utils.host_is_in_lab_zone(servo_host))
760
761    # TODO(jrbarnette):  This test to use the default lab servo hostname
762    # is a legacy that we need only until every host in the DB has
763    # proper attributes.
764    elif (not is_moblab and
765            not dnsname_mangler.is_ip_address(dut_host.hostname)):
766        servo_host = make_servo_hostname(dut_host.hostname)
767        is_in_lab = utils.host_is_in_lab_zone(servo_host)
768        if is_in_lab:
769            servo_args = {SERVO_HOST_ATTR: servo_host}
770    if servo_args is not None:
771        info = dut_host.host_info_store.get()
772        if info.board:
773            servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(
774                    info.board)
775    return servo_args, is_in_lab
776
777
778def create_servo_host(dut, servo_args, try_lab_servo=False,
779                      try_servo_repair=False):
780    """Create a ServoHost object for a given DUT, if appropriate.
781
782    This function attempts to create and verify or repair a `ServoHost`
783    object for a servo connected to the given `dut`, subject to various
784    constraints imposed by the parameters:
785      * When the `servo_args` parameter is not `None`, a servo
786        host must be created, and must be checked with `repair()`.
787      * Otherwise, if a servo exists in the lab and `try_lab_servo` is
788        true:
789          * If `try_servo_repair` is true, then create a servo host and
790            check it with `repair()`.
791          * Otherwise, if the servo responds to `ping` then create a
792            servo host and check it with `verify()`.
793
794    In cases where `servo_args` was not `None`, repair failure
795    exceptions are passed back to the caller; otherwise, exceptions
796    are logged and then discarded.  Note that this only happens in cases
797    where we're called from a test (not special task) control file that
798    has an explicit dependency on servo.  In that case, we require that
799    repair not write to `status.log`, so as to avoid polluting test
800    results.
801
802    TODO(jrbarnette):  The special handling for servo in test control
803    files is a thorn in my flesh; I dearly hope to see it cut out before
804    my retirement.
805
806    Parameters for a servo host consist of a host name, port number, and
807    DUT board, and are determined from one of these sources, in order of
808    priority:
809      * Servo attributes from the `dut` parameter take precedence over
810        all other sources of information.
811      * If a DNS entry for the servo based on the DUT hostname exists in
812        the CrOS lab network, that hostname is used with the default
813        port and the DUT's board.
814      * If no other options are found, the parameters will be taken
815        from the `servo_args` dict passed in from the caller.
816
817    @param dut            An instance of `Host` from which to take
818                          servo parameters (if available).
819    @param servo_args     A dictionary with servo parameters to use if
820                          they can't be found from `dut`.  If this
821                          argument is supplied, unrepaired exceptions
822                          from `verify()` will be passed back to the
823                          caller.
824    @param try_lab_servo  If not true, servo host creation will be
825                          skipped unless otherwise required by the
826                          caller.
827    @param try_servo_repair  If true, check a servo host with
828                          `repair()` instead of `verify()`.
829
830    @returns: A ServoHost object or None. See comments above.
831
832    """
833    servo_dependency = servo_args is not None
834    is_in_lab = False
835    if dut is not None and (try_lab_servo or servo_dependency):
836        servo_args_override, is_in_lab = _get_standard_servo_args(dut)
837        if servo_args_override is not None:
838            servo_args = servo_args_override
839    if servo_args is None:
840        return None
841    if (not servo_dependency and not try_servo_repair and
842            not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
843        return None
844    newhost = ServoHost(is_in_lab=is_in_lab, **servo_args)
845    # Note that the logic of repair() includes everything done
846    # by verify().  It's sufficient to call one or the other;
847    # we don't need both.
848    if servo_dependency:
849        newhost.repair(silent=True)
850    else:
851        try:
852            if try_servo_repair:
853                newhost.repair()
854            else:
855                newhost.verify()
856        except Exception:
857            operation = 'repair' if try_servo_repair else 'verification'
858            logging.exception('Servo %s failed for %s',
859                              operation, newhost.hostname)
860    return newhost
861