• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Lint as: python2, python3
2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5#
6# Expects to be run in an environment with sudo and no interactive password
7# prompt, such as within the Chromium OS development chroot.
8
9
10"""This file provides core logic for servo verify/repair process."""
11
12
13from __future__ import absolute_import
14from __future__ import division
15from __future__ import print_function
16
17import logging
18import os
19import re
20import tarfile
21import threading
22import json
23import tempfile
24import time
25import six
26import six.moves.xmlrpc_client
27
28try:
29    import docker
30    from autotest_lib.site_utils.docker import utils as docker_utils
31except ImportError:
32    logging.info("Docker API is not installed in this environment")
33
34from autotest_lib.client.bin import utils
35from autotest_lib.client.common_lib import error
36from autotest_lib.client.common_lib import hosts
37from autotest_lib.client.common_lib import lsbrelease_utils
38from autotest_lib.client.common_lib.cros import retry
39from autotest_lib.server import crashcollect
40from autotest_lib.server.cros.servo import servo
41from autotest_lib.server.hosts import servo_repair
42from autotest_lib.server.hosts import base_servohost
43from autotest_lib.server.hosts import servo_constants
44from autotest_lib.server.cros.faft.utils import config
45from autotest_lib.client.common_lib import global_config
46from autotest_lib.server.cros.servo.topology import servo_topology
47
48try:
49    from autotest_lib.utils.frozen_chromite.lib import metrics
50except ImportError:
51    metrics = utils.metrics_mock
52
53_CONFIG = global_config.global_config
54
55SERVOD_CONTAINER_IMAGE_PATH = "us-docker.pkg.dev/chromeos-partner-moblab/common-core"
56
57# Create the flag on drone or inside the ssp container to enable servod debug mode.
58# The servod container will not be deleted after being stopped.
59SERVOD_DEBUG_FLAG = '/servod_debug'
60
61DOCKER_SERVOD_DEBUG_MODE = os.environ.get('DOCKER_SERVOD_DEBUG_MODE', '0')
62
63
64class ServoHost(base_servohost.BaseServoHost):
65    """Host class for a servo host(e.g. beaglebone, labstation)
66     that with a servo instance for a specific port.
67
68     @type _servo: servo.Servo | None
69     """
70
71    DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
72
73    # Timeout for initializing servo signals.
74    INITIALIZE_SERVO_TIMEOUT_SECS = 60
75
76    # Default timeout for run terminal command.
77    DEFAULT_TERMINAL_TIMEOUT = 30
78
79    # Ready test function
80    SERVO_READY_METHOD = 'get_version'
81
82    # Directory prefix on the servo host where the servod logs are stored.
83    SERVOD_LOG_PREFIX = '/var/log/servod'
84
85    # Exit code to use when symlinks for servod logs are not found.
86    NO_SYMLINKS_CODE = 9
87
88    # Directory in the job's results directory to dump the logs into.
89    LOG_DIR = 'servod'
90
91    # Prefix for joint loglevel files in the logs.
92    JOINT_LOG_PREFIX = 'log'
93
94    # Regex group to extract timestamp from logfile name.
95    TS_GROUP = 'ts'
96
97    # This regex is used to extract the timestamp from servod logs.
98    # files always start with log.
99    TS_RE = (r'log.'
100             # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
101             r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
102             # The loglevel is optional depending on labstation version.
103             r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
104    TS_EXTRACTOR = re.compile(TS_RE)
105
106    # Regex group to extract MCU name from logline in servod logs.
107    MCU_GROUP = 'mcu'
108
109    # Regex group to extract logline from MCU logline in servod logs.
110    LINE_GROUP = 'line'
111
112    # This regex is used to extract the mcu and the line content from an
113    # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
114    # Here is an example log-line:
115    #
116    # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
117    # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
118    #
119    # Here is conceptually how they are formatted:
120    #
121    #  <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
122    #  <output>
123    #
124    # The log format starts with a timestamp
125    MCU_RE = (r'[\d\-]+ [\d:,]+ '
126              # The mcu that is logging this is next.
127              r'- (?P<%s>[\w/]+) - '
128              # Next, we have more log outputs before the actual line.
129              # Information about the file line, logging function etc.
130              # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
131              # NOTE: if the log format changes, this regex needs to be
132              # adjusted.
133              r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
134              # Lastly, we get the MCU's console line.
135              r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
136    MCU_EXTRACTOR = re.compile(MCU_RE)
137
138    # Regex to detect timeout messages when USBC pigtail has timeout issue.
139    # e.g.:  [475635.427072 PD TMOUT RX 1/1]
140    USBC_PIGTAIL_TIMEOUT_RE = r'\[[\d \.]{1,20}(PD TMOUT RX 1\/1)\]'
141
142    # Suffix to identify compressed logfiles.
143    COMPRESSION_SUFFIX = '.tbz2'
144
145    # A suffix to mark servod log directories that came from instance that
146    # ran during this servo_host, but are not the last one running e.g. when
147    # an instance (on purpose, or due to a bug) restarted in the middle of the
148    # run.
149    OLD_LOG_SUFFIX = 'old'
150
151    def _init_attributes(self):
152        self._servo_state = None
153        self.servo_port = None
154        self.servo_board = None
155        self._ec_supported = None
156        self.servo_model = None
157        self.servo_serial = None
158        self.servo_setup = None
159        self.servo_recovery = None
160        self.servo_fw_channel = None
161        self.additional_servod_args = None
162        self._dut_health_profile = None
163        # The flag that indicate if a servo is connected to a smart usbhub.
164        # TODO(xianuowang@) remove this flag once all usbhubs in the lab
165        # get replaced.
166        self.smart_usbhub = None
167        self._servo = None
168        self._topology = None
169        self._tunnel_proxy = None
170        self._tunnel_proxy_lock = threading.Lock()
171        self._initial_instance_ts = None
172        # Flag to make sure that multiple calls to close do not result in the
173        # logic executing multiple times.
174        self._closed = False
175        # Per-thread local data
176        self._local = threading.local()
177
178    def _initialize(self,
179                    servo_host='localhost',
180                    servo_port=DEFAULT_PORT,
181                    servo_board=None,
182                    servo_model=None,
183                    servo_serial=None,
184                    servo_setup=None,
185                    servo_recovery=None,
186                    servo_fw_channel=None,
187                    additional_servod_args=None,
188                    is_in_lab=None,
189                    *args,
190                    **dargs):
191        """Initialize a ServoHost instance.
192
193        A ServoHost instance represents a host that controls a servo.
194
195        @param servo_host: Name of the host where the servod process
196                           is running.
197        @param servo_port: Port the servod process is listening on. Defaults
198                           to the SERVOD_PORT environment variable if set,
199                           otherwise 9999.
200        @param servo_board: Board that the servo is connected to.
201        @param servo_model: Model that the servo is connected to.
202        @param servo_serial: Serial number of the servo device.
203        @param servo_setup: Type of servo setup, e.g. REGULAR or DUAL_V4.
204        @param additional_servod_args: Additional args that will append to
205                                       servod start command.
206        @param is_in_lab: True if the servo host is in Cros Lab. Default is set
207                          to None, for which utils.host_is_in_lab_zone will be
208                          called to check if the servo host is in Cros lab.
209
210        """
211        super(ServoHost, self)._initialize(hostname=servo_host,
212                                           is_in_lab=is_in_lab, *args, **dargs)
213        self._init_attributes()
214        self.servo_port = int(servo_port)
215        self.servo_board = servo_board
216        self.servo_model = servo_model
217        self.servo_serial = servo_serial
218        self.servo_setup = servo_setup
219        self.servo_recovery = servo_recovery
220        self.servo_fw_channel = servo_fw_channel
221        self.additional_servod_args = additional_servod_args
222
223        # The location of the log files on the servo host for this instance.
224        self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
225                                         self.servo_port)
226        # Path of the servo host lock file.
227        self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port) +
228                           self.LOCK_FILE_POSTFIX)
229        # File path to declare a reboot request.
230        self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port) +
231                             self.REBOOT_FILE_POSTFIX)
232
233        # Lock the servo host if it's an in-lab labstation to prevent other
234        # task to reboot it until current task completes. We also wait and
235        # make sure the labstation is up here, in the case of the labstation is
236        # in the middle of reboot.
237        self._is_locked = False
238        if (not self.is_containerized_servod()
239                    and self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
240                    and self.is_labstation()):
241            self._lock()
242            try:
243                self.wait_ready()
244            except Exception as e:
245                logging.info(
246                        'Unexpected error while ensure labstation'
247                        ' readiness; %s', str(e))
248
249        self._repair_strategy = (
250                servo_repair.create_servo_repair_strategy())
251
252    def __str__(self):
253        return "<%s '%s:%s'>" % (
254                type(self).__name__, self.hostname, self.servo_port)
255
256    def connect_servo(self):
257        """ Initialize and setup servo for later use.
258        """
259        self.initialize_servo()
260        self.initialize_dut_for_servo()
261
262    def initialize_servo(self):
263        """Establish a connection to the servod server on this host.
264
265        Initializes `self._servo` and then verifies that all network
266        connections are working.  This will create an ssh tunnel if
267        it's required.
268        """
269        self._servo = servo.Servo(servo_host=self,
270                                  servo_serial=self.servo_serial)
271
272    def initialize_dut_for_servo(self):
273        """This method will do some setup for dut control, e.g. setup
274        main servo_v4 device, and also testing the connection between servo
275        and DUT. As a side effect of testing the connection, all signals on
276        the target servo are reset to default values, and the USB stick is
277        set to the neutral (off) position.
278        """
279        if not self._servo:
280            raise hosts.AutoservVerifyError('Servo object needs to be'
281                                            ' initialized before initialize'
282                                            ' DUT.')
283        timeout, _ = retry.timeout(
284            self._servo.initialize_dut,
285            timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
286        if timeout:
287            raise hosts.AutoservVerifyError('Initialize dut for servo timed'
288                                            ' out.')
289
290    def disconnect_servo(self):
291        """Disconnect our servo if it exists.
292
293        If we've previously successfully connected to our servo,
294        disconnect any established ssh tunnel, and set `self._servo`
295        back to `None`.
296        """
297        if self._servo:
298            # N.B. This call is safe even without a tunnel:
299            # rpc_server_tracker.disconnect() silently ignores
300            # unknown ports.
301            self.rpc_server_tracker.disconnect(self.servo_port)
302            self._servo = None
303
304    def _to_str(self, maybe_str):
305        """If the param is bytes, convert it to a str."""
306        # b/217780680, Make this compatible with python3,
307        if isinstance(maybe_str, bytes):
308            return maybe_str.decode('utf-8', errors='replace')
309        return maybe_str
310
311    def _maybe_create_servod_ssh_tunnel_proxy(self):
312        """Create a xmlrpc proxy for use with a ssh tunnel.
313        A lock is used to safely create a singleton proxy.
314        """
315        with self._tunnel_proxy_lock:
316            if self._tunnel_proxy is None:
317                self._tunnel_proxy = self.rpc_server_tracker.xmlrpc_connect(
318                       None,
319                       self.servo_port,
320                       ready_test_name=self.SERVO_READY_METHOD,
321                       timeout_seconds=60,
322                       request_timeout_seconds=3600,
323                       server_desc=str(self))
324
325    def get_servod_server_proxy(self):
326        """Return a proxy if it exists; otherwise, create a new one.
327        A proxy can either be a ssh tunnel based proxy, or a httplib
328        based proxy.
329
330        @returns: An xmlrpclib.ServerProxy that is connected to the servod
331                  server on the host.
332        """
333        if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
334                    and not self.is_localhost()
335                    and not self.is_containerized_servod()):
336            # Check for existing ssh tunnel proxy.
337            if self._tunnel_proxy is None:
338                self._maybe_create_servod_ssh_tunnel_proxy()
339            return self._tunnel_proxy
340        else:
341            # xmlrpc/httplib is not thread-safe, so each thread must have its
342            # own separate proxy connection.
343            if not hasattr(self._local, "_per_thread_proxy"):
344                if self.is_containerized_servod():
345                    ip_addr = docker_utils.get_container_ip(self.hostname)
346                    remote = 'http://%s:%s' % (ip_addr, self.servo_port)
347                else:
348                    remote = 'http://%s:%s' % (self.hostname, self.servo_port)
349                logging.debug('Servo Proxy RPC URL: %s', remote)
350                self._local._per_thread_proxy = six.moves.xmlrpc_client.ServerProxy(remote)
351            return self._local._per_thread_proxy
352
353    def verify(self, silent=False):
354        """Update the servo host and verify it's in a good state.
355
356        @param silent   If true, suppress logging in `status.log`.
357        """
358        message = 'Beginning verify for servo host %s port %s serial %s'
359        message %= (self.hostname, self.servo_port, self.servo_serial)
360        self.record('INFO', None, None, message)
361        try:
362            self._repair_strategy.verify(self, silent)
363            self._servo_state = servo_constants.SERVO_STATE_WORKING
364            self.record('INFO', None, None,
365                        'ServoHost verify set servo_state as WORKING')
366        except Exception as e:
367            if not self.is_localhost():
368                self._servo_state = self.determine_servo_state()
369                self.record('INFO', None, None,
370                            'ServoHost verify set servo_state as %s'
371                            % self._servo_state)
372            if self._is_critical_error(e):
373                raise
374
375    def _get_default_usbkey_mount_path(self):
376        return '/media/servo_usb/%s' % self.servo_port
377
378    def get_image_name_from_usbkey(self, usbkey_dev):
379        """Mount usb drive and check ChromeOS image name on it if there is
380        one. This method assumes the image_usbkey_direction is already set
381        to servo side.
382
383        @param usbkey_dev: usbkey dev path(e.g. /dev/sdb).
384
385        @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
386                  or empty string if no test image detected, or unexpected
387                  error occurred.
388        """
389        logging.info('Checking ChromeOS image name on usbkey.')
390        mount_dst = self._get_default_usbkey_mount_path()
391        # Unmount if there is an existing stale mount.
392        self._unmount_drive(mount_dst)
393        # ChromeOS root fs is in /dev/sdx3
394        mount_src = usbkey_dev + '3'
395        try:
396            if not self._mount_drive(mount_src, mount_dst):
397                logging.debug('Unexpected error occurred on mount usb drive.')
398                return ''
399
400            release_content = self._to_str(
401                    self.run('cat %s/etc/lsb-release' % mount_dst,
402                             ignore_status=True).stdout.strip())
403
404            if not re.search(r'RELEASE_TRACK=.*test', release_content):
405                logging.info('The image on usbkey is not a test image')
406                return ''
407
408            return lsbrelease_utils.get_chromeos_release_builder_path(
409                lsb_release_content=release_content)
410        finally:
411            logging.debug('Image check compeleted, unmounting the usb drive.')
412            self._unmount_drive(mount_dst)
413
414    def _extract_firmware_image_from_usbkey(self, fw_dst):
415        """Extract firmware images from the usbkey on servo, this method
416        assumes there is already a ChromeOS test image staged on servo.
417
418        @param fw_dst: the path that we'll copy firmware images to.
419
420        @returns: a json format string of firmware manifest data.
421        """
422        usbkey_dev = self._probe_and_validate_usb_dev()
423        if not usbkey_dev:
424            raise hosts.AutoservRepairError('Unexpected error occurred when'
425                      ' probe usbkey dev path, please check logs for detail.')
426
427        mount_dst = self._get_default_usbkey_mount_path()
428        # Unmount if there is an existing stale mount.
429        self._unmount_drive(mount_dst)
430        # ChromeOS root fs is in /dev/sdx3
431        mount_src = usbkey_dev + '3'
432        try:
433            if not self._mount_drive(mount_src, mount_dst):
434                raise hosts.AutoservRepairError('Failed to extract firmware'
435                          ' image; Unable to mount %s.' % usbkey_dev,
436                          'unable to mount usbkey')
437            updater_bin = os.path.join(mount_dst,
438                                       'usr/sbin/chromeos-firmwareupdate')
439            self.run('%s --unpack %s' % (updater_bin, fw_dst))
440            return self._to_str(self.run('%s --manifest' % updater_bin).stdout)
441
442        finally:
443            self._unmount_drive(mount_dst)
444
445    def prepare_repair_firmware_image(self, fw_dst=None):
446        """Prepare firmware image on the servohost for auto repair process
447        to consume.
448
449        @param fw_dst: the path that we want to store firmware image on
450                       the servohost.
451
452        @returns: A tuple that containes ec firmware image path and bios
453                  firmware image path on the servohost, or None if type of
454                  image is not available based on manifest and dut's model.
455        """
456        model = self.servo_model or self._dut_host_info.model
457        if not model:
458            raise hosts.AutoservRepairError(
459                      'Could not determine DUT\'s model.',
460                      'model infomation unknown')
461
462        if not fw_dst:
463            fw_dst = '/tmp/firmware_image/%s' % self.servo_port
464        # Cleanup and re-create dst path to have a fresh start.
465        self.run('rm -rf %s' % fw_dst)
466        self.run('mkdir -p %s' % fw_dst)
467
468        manifest = json.loads(self._extract_firmware_image_from_usbkey(fw_dst))
469        # For models that have packed $MODEL_signed variant, we want use the
470        # 'signed' variant once we get DVT devices, so try to read manifest
471        # from $MODEL_signed first.
472        build = manifest.get('%s_signed' % model) or manifest.get(model)
473        if not build:
474            raise hosts.AutoservRepairError('Could not find firmware manifest'
475                      ' for model:%s' % model, 'model manifest not found')
476        try:
477            ec_image = os.path.join(fw_dst, build['ec']['image'])
478        except KeyError:
479            ec_image = None
480        try:
481            bios_image = os.path.join(fw_dst, build['host']['image'])
482        except KeyError:
483            bios_image = None
484        if not ec_image and not bios_image:
485            raise hosts.AutoservRepairError('Could not find any firmware image'
486                      ' for model:%s' % model, 'cannot find firmware image')
487        return ec_image, bios_image
488
489    def flash_ap_firmware_via_servo(self, image):
490        """Flash AP firmware by use a provided image.
491
492        This is will be a short term enhanment for infra repair use, it use
493        'futility update' which will automatically determine various parameters
494        needed for flashrom, and will preserve the GBB, VPD, and HWID for
495        AP firmware update.
496        @TODO(xianuowang@) Remove this method once b/148403277 implemented.
497
498        @param image: the firmware image path on servohost.
499        """
500        cmd = 'futility update -i %s --servo_port=%s'
501        self.run(cmd % (image, self.servo_port), timeout=900)
502
503    def _probe_and_validate_usb_dev(self):
504        """This method probe the usb dev path by talking to servo, and then
505        validate the dev path is valid block device to servohost.
506        Possible output:
507        1. Encounter error during probe usb dev, returns empty string.
508        2. probe usb dev completed without error but cannot find usb dev,
509           raise AutoservRepairError.
510        3. probe usb dev find a usb dev path, but failed validation in this
511           method, raise AutoservRepairError.
512
513        @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
514        if unexpected error occurred during probe.
515        @raises: AutoservRepairError if servo couldn't probe the usb dev path
516        (servo.probe_host_usb_dev() returns empty string), or the dev path is
517        not valid block device to servohost.
518        """
519        logging.info('Validating image usbkey on servo.')
520        try:
521            usb_dev = self._servo.probe_host_usb_dev()
522        except Exception as e:
523            # We don't want any unexpected or transient servo communicating
524            # failure block usb repair, so capture all errors here.
525            logging.error(e, exc_info=True)
526            logging.error('Unexpected error occurred on get usbkey dev path,'
527                          ' skipping usbkey validation.')
528            return ''
529
530        if usb_dev:
531            # probe_host_usb_dev() sometimes return stale record,
532            # so we need to make sure the path exists in fdisk.
533            validate_cmd = 'fdisk -l %s' % usb_dev
534            try:
535                resp = self.run(validate_cmd, ignore_status=True, timeout=30)
536                if resp.exit_status == 0:
537                    return usb_dev
538                logging.error('%s is reported from "image_usbkey_dev" control'
539                              ' but not detected by fdisk!', usb_dev)
540            except error.AutoservRunError as e:
541                if 'Timeout encountered' in str(e):
542                    logging.warning('Timeout encountered during fdisk run,'
543                                    ' skipping usbkey validation.')
544                    return ''
545                raise
546
547        raise hosts.AutoservRepairError(
548              'No usbkey detected on servo, the usbkey may be either missing'
549              ' or broken. Please replace usbkey on the servo and retry.',
550              'missing usbkey')
551
552    def is_ec_supported(self):
553        """Check if ec is supported on the servo_board"""
554        if self.servo_board:
555            if self._ec_supported is not None:
556                return self._ec_supported
557            try:
558                frm_config = config.Config(self.servo_board, self.servo_model)
559                self._ec_supported = getattr(frm_config, 'chrome_ec', False)
560                return self._ec_supported
561            except Exception as e:
562                logging.error(
563                        'Unexpected error when read from firmware'
564                        ' configs; %s', e)
565        else:
566            logging.debug('Cannot detect if DUT has EC as board unknown.')
567        return False
568
569    def validate_image_usbkey(self):
570        """This method first validate if there is a recover usbkey on servo
571        that accessible to servohost, and second check if a ChromeOS image is
572        already on the usb drive and return the image_name so we can avoid
573        unnecessary download and flash to the recover usbkey on servo.
574
575        Please note that, there is special error handling logic here:
576        1. If unexpected error happens, we return empty string. So repair
577           actions will not get blocked.
578        2. If no working usbkey present on servo, but no errors, we'll raise
579           AutoservRepairError here.
580
581        @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
582                  or empty string if no test image detected, or unexpected
583                  error occurred.
584        @raises:  AutoservRepairError if the usbkey is not detected on servo.
585        """
586        usb_dev = self._probe_and_validate_usb_dev()
587        if usb_dev:
588            return self.get_image_name_from_usbkey(usb_dev)
589        else:
590            return ''
591
592    def repair(self, silent=False):
593        """Attempt to repair servo host.
594
595        @param silent   If true, suppress logging in `status.log`.
596        """
597        message = 'Beginning repair for servo host %s port %s serial %s'
598        message %= (self.hostname, self.servo_port, self.servo_serial)
599        self.record('INFO', None, None, message)
600        try:
601            self._repair_strategy.repair(self, silent)
602            self._servo_state = servo_constants.SERVO_STATE_WORKING
603            self.record('INFO', None, None,
604                        'ServoHost repair set servo_state as WORKING')
605            # If target is a labstation then try to withdraw any existing
606            # reboot request created by this servo because it passed repair.
607            if self.is_labstation():
608                self.withdraw_reboot_request()
609        except Exception as e:
610            if not self.is_localhost():
611                self._servo_state = self.determine_servo_state()
612                self.record('INFO', None, None,
613                            'ServoHost repair set servo_state as %s'
614                            % self._servo_state)
615            if self._is_critical_error(e):
616                self.disconnect_servo()
617                self.stop_servod()
618                raise
619
620    def _is_critical_error(self, error):
621        if (isinstance(error, hosts.AutoservVerifyDependencyError)
622            and not error.is_critical()):
623            logging.warning('Non-critical verify failure(s) detected during'
624                            ' verify/repair servo, servo connection will'
625                            ' still be up but may not be fully functional.'
626                            ' Some repair actions and servo dependent'
627                            ' tests may not run.')
628            return False
629        logging.info(
630                'Critical verify failure(s) detected during repair/verify '
631                'servo. Disconnecting servo and running `stop servod`, all'
632                ' repair actions and tests that depends on servo will not '
633                'run.')
634        return True
635
636    def get_servo(self):
637        """Get the cached servo.Servo object.
638
639        @return: a servo.Servo object.
640        @rtype: autotest_lib.server.cros.servo.servo.Servo
641        """
642        return self._servo
643
644    def request_reboot(self):
645        """Request servohost to be rebooted when it's safe to by touch a file.
646        """
647        logging.debug('Request to reboot servohost %s has been created by '
648                      'servo with port # %s', self.hostname, self.servo_port)
649        self.run('touch %s' % self._reboot_file, ignore_status=True)
650
651    def withdraw_reboot_request(self):
652        """Withdraw a servohost reboot request if exists by remove the flag
653        file.
654        """
655        logging.debug('Withdrawing request to reboot servohost %s that created'
656                      ' by servo with port # %s if exists.',
657                      self.hostname, self.servo_port)
658        self.run('rm -f %s' % self._reboot_file, ignore_status=True)
659
660    def start_servod(self, quick_startup=False):
661        """Start the servod process on servohost.
662        """
663        # Skip if running on the localhost.(crbug.com/1038168)
664        if self.is_localhost():
665            logging.debug("Servohost is a localhost, skipping start servod.")
666            return
667
668        if self.is_containerized_servod():
669            return self.start_containerized_servod()
670
671        cmd = 'start servod'
672        if self.servo_board:
673            cmd += ' BOARD=%s' % self.servo_board
674            if self.servo_model:
675                cmd += ' MODEL=%s' % self.servo_model
676        else:
677            logging.warning('Board for DUT is unknown; starting servod'
678                            ' assuming a pre-configured board.')
679
680        cmd += ' PORT=%d' % self.servo_port
681        if self.servo_serial:
682            cmd += ' SERIAL=%s' % self.servo_serial
683
684        # Start servod with dual_v4 based on servo_setup.
685        if self.is_dual_setup():
686            cmd += ' DUAL_V4=1'
687
688        # Start servod with CONFIG=cr50.xml which required for some pools.
689        if self._require_cr50_servod_config():
690            cmd += ' CONFIG=cr50.xml'
691
692        if self.servo_recovery == True:
693            cmd += ' REC_MODE=1'
694
695        # Adding customized args if any.
696        if self.additional_servod_args:
697            cmd += ' ' + self.additional_servod_args
698
699        # Remove the symbolic links from the logs. This helps ensure that
700        # a failed servod instantiation does not cause us to grab old logs
701        # by mistake.
702        self.remove_latest_log_symlinks()
703        self.run(cmd, timeout=60)
704
705        # There's a lag between when `start servod` completes and when
706        # the _ServodConnectionVerifier trigger can actually succeed.
707        # The call to time.sleep() below gives time to make sure that
708        # the trigger won't fail after we return.
709
710        # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
711        # But in the rare case all servo on a labstation are in heavy use they
712        # may take ~30 seconds. So the timeout value will double these value,
713        # and we'll try quick start up when first time initialize servohost,
714        # and use standard start up timeout in repair.
715        if quick_startup:
716            timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
717        else:
718            timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
719        logging.debug('Wait %s seconds for servod process fully up.', timeout)
720        time.sleep(timeout)
721        # Cache the initial instance timestamp to check against servod restarts
722        self._initial_instance_ts = self.get_instance_logs_ts()
723
724    def stop_servod(self):
725        """Stop the servod process on servohost.
726        """
727        # Skip if running on the localhost.(crbug.com/1038168)
728        if self.is_localhost():
729            logging.debug("Servohost is a localhost, skipping stop servod.")
730            return
731
732        if self.is_containerized_servod():
733            # TODO(gregorynisbet): Remove this message in 2022Q2.
734            logging.debug("ServoHost: Detected containerized servod.")
735            remove_container = True
736            if DOCKER_SERVOD_DEBUG_MODE == '1' or os.path.exists(
737                    SERVOD_DEBUG_FLAG):
738                remove_container = False
739            self.stop_containerized_servod(remove_container=remove_container)
740            return
741
742        logging.debug('Stopping servod on port %s', self.servo_port)
743        self.run('stop servod PORT=%d' % self.servo_port,
744                 timeout=60, ignore_status=True)
745        logging.debug('Wait %s seconds for servod process fully teardown.',
746                      servo_constants.SERVOD_TEARDOWN_TIMEOUT)
747        time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
748
749    def wait_for_init_servod_in_container(self, container):
750        """Waits for servod process to be ready to listen inside container."""
751        ready_output = "Instance associated with id %s ready" % self.servo_port
752        if not container:
753            logging.debug("Container object is None.")
754            return False
755        try:
756            # Executes servodtool command to wait for servod to be active.
757            exit_code, output = container.exec_run(
758                    cmd="servodtool instance wait-for-active -p %s" %
759                    self.servo_port,
760                    stdout=True)
761            # b/217780680, Make this compatible with python3,
762            if isinstance(output, bytes):
763                output = output.decode(errors='replace')
764            if exit_code != 0 or ready_output not in output:
765                logging.debug(
766                        'Failed to start servod process inside container,'
767                        'exit_code=%s, output=%s.', exit_code, output)
768                return False
769        except docker.errors.APIError as e:
770            logging.error('%s', e)
771        return True
772
773    def start_containerized_servod(self, with_servod=True):
774        """Start the servod process on servohost."""
775        logging.info("Starting servod container %s.",
776                     self.servod_container_name)
777        client = docker_utils.get_docker_client()
778        logging.debug("Docker deamon ping %s", client.ping())
779        labels = {'WITH_SERVOD': str(with_servod)}
780        try:
781            if self.is_up(with_servod=with_servod):
782                logging.warning("Container already exists - not starting")
783                return
784            logging.info(
785                    'Servod container either does not exist or is not running.'
786            )
787            self.stop_containerized_servod()
788        except docker.errors.APIError:
789            # Container exists but is not running
790            logging.info("Cleanup of non functional container.")
791            self.stop_containerized_servod()
792
793        label = os.environ.get("SERVOD_CONTAINER_LABEL", "release")
794        registry = os.environ.get("REGISTRY_URI", SERVOD_CONTAINER_IMAGE_PATH)
795        image = "%s/servod:%s" % (registry, label)
796        logging.info("Servod container image: %s", image)
797
798        try:
799            client.images.pull(image)
800        except docker.errors.APIError:
801            logging.exception("Failed to pull servod container image.")
802
803        environment = [
804                "BOARD=%s" % self.servo_board,
805                "MODEL=%s" % self.servo_model,
806                "SERIAL=%s" % self.servo_serial,
807                "PORT=%s" % self.servo_port,
808        ]
809        # Start servod with dual_v4 based on servo_setup.
810        if self.is_dual_setup():
811            environment.append("DUAL_V4=1")
812        # Start servod with CONFIG=cr50.xml which required for some pools.
813        if self._require_cr50_servod_config():
814            environment.append("CONFIG=cr50.xml")
815        if self.servo_recovery == True:
816            environment.append("REC_MODE=1")
817
818        container_network = os.environ.get("DOCKER_DEFAULT_NETWORK", None)
819        # In case the network environment is not set, fallback to default network
820        # for moblab or satlab based on the TLE.
821        if not container_network:
822            container_network = "default_moblab"
823            if 'drone' in docker_utils.get_running_containers(client=client):
824                container_network = "default_satlab"
825
826        logging.info('Servod container will use %s network', container_network)
827        logging.info('Servod container environment: %s', environment)
828        try:
829            start_cmds = ["bash", "/start_servod.sh"]
830            if not with_servod:
831                # In some cases we do not need container without running servod.
832                start_cmds = ["tail", "-f", "/dev/null"]
833                logging.debug(
834                        'Attempting to start Servod container without servod')
835            container = client.containers.run(
836                    image,
837                    remove=False,
838                    privileged=True,
839                    labels=labels,
840                    name=self.servod_container_name,
841                    hostname=self.servod_container_name,
842                    network=container_network,
843                    cap_add=["NET_ADMIN"],
844                    detach=True,
845                    volumes=[
846                            "/dev:/dev",
847                            "%s_log:/var/log/servod_%s/" %
848                            (self.servod_container_name, self.servo_port)
849                    ],
850                    environment=environment,
851                    command=start_cmds,
852            )
853            # Probing servod ready state fails.
854            if with_servod:
855                current_time = 0
856                while not self.wait_for_init_servod_in_container(
857                        container
858                ) and current_time <= servo_constants.SERVOD_STARTUP_TIMEOUT:
859                    time.sleep(10)
860                    current_time += 10
861
862                if not self.wait_for_init_servod_in_container(container):
863                    logging.info(
864                            'Servod process is not up within the servod container after %s seconds.'
865                            % servo_constants.SERVOD_STARTUP_TIMEOUT)
866                else:
867                    logging.info(
868                            'Servod process is up within the servod container after %s seconds.'
869                            % current_time)
870            else:
871                logging.info(
872                        "Servod container %s up and running without servod process.",
873                        self.servod_container_name)
874
875        except docker.errors.ContainerError as e:
876            logging.exception("Failed to start servod container. %s", e)
877            raise
878        except docker.errors.ImageNotFound:
879            logging.exception("Servod container image %s not found.", image)
880
881    def stop_containerized_servod(self, remove_container=True):
882        """Stop the container running servod."""
883        logging.info("Stopping servod container %s.",
884                     self.servod_container_name)
885        client = docker_utils.get_docker_client()
886        try:
887            cont = client.containers.get(self.servod_container_name)
888        except docker.errors.NotFound:
889            logging.info("Servod container %s not found no need to stop it.",
890                         self.servod_container_name)
891        except docker.errors.APIError:
892            logging.exception(
893                    "Stopping servod container %s caused a docker error.",
894                    self.servod_container_name)
895        else:
896            if remove_container == True:
897                cont.remove(force=True)
898                logging.debug('Servod container instance removed')
899
900    def restart_servod(self, quick_startup=False):
901        """Restart the servod process on servohost.
902        """
903        self.stop_servod()
904        self.start_servod(quick_startup)
905
906    def _process_servodtool_error(self, response):
907        """Helper function to handle non-zero servodtool response.
908        """
909        if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
910                     self._to_str(response.stdout)):
911            logging.error('The servo is not plugged on a usb hub that supports'
912                          ' power-cycle!')
913            # change the flag so we can update this label in later process.
914            self.smart_usbhub = False
915            return
916
917        if re.search(
918                servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
919                self.servo_serial, self._to_str(response.stdout)):
920            logging.error('No servo with serial %s found!', self.servo_serial)
921            return
922
923        logging.error('Unexpected error occurred from usbhub control, please'
924                      ' file a bug and inform chrome-fleet-software@ team!')
925
926    def get_main_servo_usb_path(self):
927        """Helper function to collect current usb-path to main servo.
928
929        The usb-path is path to the folder where usb-device was enumerated.
930        If fail then will return an empty string ('').
931
932        @returns: string, usb-path to the main servo device.
933            e.g.: '/sys/bus/usb/devices/1-6.1.3.1'
934        """
935        # TODO remove try-except when fix crbug.com/1087964
936        try:
937            cmd = 'servodtool device -s %s usb-path' % self.servo_serial
938            resp = self.run(cmd, ignore_status=True, timeout=30)
939        except Exception as e:
940            # Here we catch only timeout errors.
941            # Other errors is filtered by ignore_status=True
942            logging.debug('Attempt to get servo usb-path failed due to '
943                          'timeout; %s', e)
944            return ''
945        if resp.exit_status != 0:
946            self._process_servodtool_error(resp)
947            return ''
948        usb_path = self._to_str(resp.stdout.strip())
949
950        logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
951        return usb_path
952
953    def _get_servo_usb_devnum(self):
954        """Helper function to collect current usb devnum of servo."""
955        usb_path = self.get_main_servo_usb_path()
956        if not usb_path:
957            return ''
958        resp = self.run('cat %s/devnum' % usb_path, ignore_status=True)
959        if resp.exit_status != 0:
960            self._process_servodtool_error(resp)
961            return ''
962        return self._to_str(resp.stdout.strip())
963
964    def reboot_servo_v3_on_need(self):
965        """Check and reboot servo_v3 based on below conditions.
966               1. If there is an update pending on reboot.
967               2. Servo_v3 has been up for more than 96 hours.
968        """
969        if self.get_board() != 'beaglebone_servo':
970            logging.info('Servo reboot is only applicable for servo V3.')
971            return
972
973        update_pending_reboot = (self._check_update_status() ==
974                                 self.UPDATE_STATE.PENDING_REBOOT)
975        uptime_hours = float(self.check_uptime())/3600
976        logging.info('Uptime of servo_v3: %s hour(s)', uptime_hours)
977        long_up_time = uptime_hours > 96
978
979        # Skip reboot if neither condition are met.
980        if not (update_pending_reboot or long_up_time):
981            return
982
983        if update_pending_reboot:
984            message = 'Starting reboot servo_v3 because an update is pending.'
985            reboot_method = self._post_update_reboot
986        elif long_up_time:
987            message = 'Starting reboot servo_v3 because uptime > 96 hours.'
988            reboot_method = self._servo_host_reboot
989        self.record('INFO', None, None, message)
990        logging.info(message)
991        try:
992            reboot_method()
993            message = 'Servo_v3 reboot completed successfully.'
994        except Exception as e:
995            logging.debug("Fail to reboot servo_v3; %s", e)
996            message = ('Servo_v3 reboot failed, please check debug log '
997                       'for details.')
998        logging.info(message)
999        self.record('INFO', None, None, message)
1000
1001    def _reset_servo(self):
1002        logging.info('Resetting servo through smart usbhub.')
1003        # TODO remove try-except when fix crbug.com/1087964
1004        try:
1005            resp = self.run('servodtool device -s %s power-cycle' %
1006                            self.servo_serial, ignore_status=True,
1007                            timeout=30)
1008            if resp.exit_status != 0:
1009                self._process_servodtool_error(resp)
1010                return False
1011        except Exception as e:
1012            # Here we catch only timeout errors.
1013            # Other errors is filtered by ignore_status=True
1014            logging.debug('Attempt to reset servo failed due to timeout;'
1015                          ' %s', e)
1016            return False
1017
1018        logging.debug('Wait %s seconds for servo to come back from reset.',
1019                      servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
1020        time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
1021        # change the flag so we can update this label in later process.
1022        self.smart_usbhub = True
1023        return True
1024
1025    def reset_servo(self):
1026        """Reset(power-cycle) the servo via smart usbhub.
1027        """
1028        if not self.is_labstation():
1029            logging.info('Servo reset is not applicable to servo_v3.')
1030            return
1031
1032        pre_reset_devnum = self._get_servo_usb_devnum()
1033        logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
1034        result = self._reset_servo()
1035        if not result:
1036            message = ('Failed to reset servo with serial: %s. (Please ignore'
1037                       ' this error if the DUT is not connected to a smart'
1038                       ' usbhub).' % self.servo_serial)
1039            logging.warning(message)
1040            self.record('INFO', None, None, message)
1041            return
1042
1043        post_reset_devnum = self._get_servo_usb_devnum()
1044        logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
1045        if not (pre_reset_devnum and post_reset_devnum):
1046            message = ('Servo reset completed but unable to verify'
1047                       ' devnum change!')
1048        elif pre_reset_devnum != post_reset_devnum:
1049            message = ('Reset servo with serial %s completed successfully!'
1050                       % self.servo_serial)
1051        else:
1052            message = 'Servo reset completed but devnum is still not changed!'
1053        logging.info(message)
1054        self.record('INFO', None, None, message)
1055
1056    def _extract_compressed_logs(self, logdir, relevant_files):
1057        """Decompress servod logs in |logdir|.
1058
1059        @param logdir: directory containing compressed servod logs.
1060        @param relevant_files: list of files in |logdir| to consider.
1061
1062        @returns: tuple, (tarfiles, files) where
1063                  tarfiles: list of the compressed filenames that have been
1064                            extracted and deleted
1065                  files:  list of the uncompressed files that were generated
1066        """
1067        # For all tar-files, first extract them to the directory, and
1068        # then let the common flow handle them.
1069        tarfiles = [cf for cf in relevant_files if
1070                    cf.endswith(self.COMPRESSION_SUFFIX)]
1071        files = []
1072        for f in tarfiles:
1073            norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
1074            with tarfile.open(f) as tf:
1075                # Each tarfile has only one member, as
1076                # that's the compressed log.
1077                member = tf.members[0]
1078                # Manipulate so that it only extracts the basename, and not
1079                # the directories etc.
1080                member.name = norm_name
1081                files.append(os.path.join(logdir, member.name))
1082                tf.extract(member, logdir)
1083            # File has been extracted: remove the compressed file.
1084            os.remove(f)
1085        return tarfiles, files
1086
1087    def _extract_mcu_logs(self, log_subdir):
1088        """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
1089
1090        Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
1091        lines from the logs to generate invidiual console logs e.g. after
1092        this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
1093        those MCUs had any console input/output.
1094
1095        @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
1096        """
1097        # Extract the MCU for each one. The MCU logs are only in the .DEBUG
1098        # files
1099        mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
1100        if not os.path.exists(mcu_lines_file):
1101            logging.info('No DEBUG logs found to extract MCU logs from.')
1102            return
1103        mcu_files = {}
1104        mcu_file_template = '%s.txt'
1105        with open(mcu_lines_file, 'r') as f:
1106            for line in f:
1107                match = self.MCU_EXTRACTOR.match(line)
1108                if match:
1109                    mcu = match.group(self.MCU_GROUP).lower()
1110                    # The |mcu| might contain a '/' in it. Replace it with '.'
1111                    # to avoid generating bad filepaths.
1112                    mcu = mcu.replace('/', '.')
1113                    line = match.group(self.LINE_GROUP)
1114                    if mcu not in mcu_files:
1115                        mcu_file = os.path.join(log_subdir,
1116                                                mcu_file_template % mcu)
1117                        mcu_files[mcu] = open(mcu_file, 'a')
1118                    fd = mcu_files[mcu]
1119                    fd.write(line + '\n')
1120        for f in mcu_files:
1121            mcu_files[f].close()
1122
1123    def remove_latest_log_symlinks(self):
1124        """Remove the conveninence symlinks 'latest' servod logs."""
1125        symlink_wildcard = '%s/latest*' % self.remote_log_dir
1126        cmd = 'rm ' + symlink_wildcard
1127        self.run(cmd, stderr_tee=None, ignore_status=True)
1128
1129    def probe_servod_restart(self, instance_ts, outdir):
1130        """Grab servod logs from previous instances if part of this session.
1131
1132        If since the last time this host called start_servod() servod crashed
1133        and restarted, this helper finds those logs as well, and stores them
1134        with the |OLD_LOG_SUFFIX| to investigate if necessary.
1135
1136        It also issues a panicinfo command to servo devices after the restart
1137        to try and collect reboot information for debugging.
1138
1139        @param instance_ts: the log timestamp that the current instance uses
1140        @param outdir: directory to create a subdirectory into to place the
1141                       servod logs into.
1142        """
1143        if self._initial_instance_ts is None:
1144            logging.info('No log timestamp grabbed successfully on servod '
1145                         'startup. Cannot check device restarts. Ignoring.')
1146            return
1147        if instance_ts == self._initial_instance_ts:
1148            logging.debug('Servod appears to have run without restarting')
1149            return
1150        # Servod seems to have restarted (at least once). |_initial_instance_ts|
1151        # is the first timestamp, and instance_ts is the current timestamp. Find
1152        # all timestamps in between them, and grab the logs for each.
1153        tss = self._find_instance_timestamps_between(self._initial_instance_ts,
1154                                                     instance_ts)
1155        logging.info('Servod has restarted %d times between the start and the '
1156                     'end of this servo_host.', len(tss))
1157        logging.info('This might be an issue. Will extract all logs from each '
1158                     'instance.')
1159        logging.info('Logs that are not the currently running (about to turn '
1160                     'down) instance are maked with a .%s in their folder.',
1161                     self.OLD_LOG_SUFFIX)
1162        for ts in tss:
1163            self.get_instance_logs(ts, outdir, old=True)
1164        # Lastly, servod has restarted due to a potential issue. Try to get
1165        # panic information from servo micro and servo v4 for the current logs.
1166        # This can only happen if the |_servo| attribute is initialized.
1167        if self._servo:
1168            for mcu in ['servo_micro', 'servo_v4', 'servo_v4p1']:
1169                ctrl = '%s_uart_cmd' % mcu
1170                if self._servo.has_control(ctrl):
1171                    logging.info('Trying to retrieve %r panicinfo into logs',
1172                                 mcu)
1173                    try:
1174                        self._servo.set_nocheck(ctrl, 'panicinfo')
1175                    except error.TestFail as e:
1176                        logging.error('Failed to generate panicinfo for %r '
1177                                      'logs. %s', mcu, str(e))
1178
1179    def _find_instance_timestamps_between(self, start_ts, end_ts):
1180        """Find all log timestamps between [start_ts, end_ts).
1181
1182        @param start_ts: str, earliest log timestamp of interest
1183        @param end_ts: str, latest log timestamp of interest
1184
1185        @returns: list, all timestamps between start_ts and end_ts, end_ts
1186                  exclusive, on the servo_host. An empty list on errors
1187        """
1188        # Simply get all timestamp, and then sort and remove
1189        cmd = 'ls %s' % self.remote_log_dir
1190        res = self.run(cmd, stderr_tee=None, ignore_status=True)
1191        if res.exit_status != 0:
1192            # Here we failed to find anything.
1193            logging.info('Failed to find remote servod logs. Ignoring.')
1194            return []
1195        logfiles = self._to_str(res.stdout.strip().split())
1196        timestamps = set()
1197        for logfile in logfiles:
1198            ts_match = self.TS_EXTRACTOR.match(logfile)
1199            if not ts_match:
1200                # Simply ignore files that fail the check. It might be the
1201                # 'latest' symlinks or random files.
1202                continue
1203            timestamps.add(ts_match.group(self.TS_GROUP))
1204        # At this point we have all unique timestamps.
1205        timestamps = sorted(timestamps)
1206        for ts in [start_ts, end_ts]:
1207            if ts not in timestamps:
1208                logging.error('Timestamp %r not in servod logs. Cannot query '
1209                              'for timestamps in between %r and %r', ts,
1210                              start_ts, end_ts)
1211                return []
1212        return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
1213
1214    def get_instance_logs_ts(self):
1215        """Retrieve the currently running servod instance's log timestamp
1216
1217        @returns: str, timestamp for current instance, or None on failure
1218        """
1219        # First, extract the timestamp. This cmd gives the real filename of
1220        # the latest aka current log file.
1221        cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
1222               'then realpath %(dir)s/latest.DEBUG;'
1223               'elif [ -f %(dir)s/latest ];'
1224               'then realpath %(dir)s/latest;'
1225               'else exit %(code)d;'
1226               'fi' % {'dir': self.remote_log_dir,
1227                       'code': self.NO_SYMLINKS_CODE})
1228        res = self.run(cmd, stderr_tee=None, ignore_status=True)
1229        if res.exit_status != 0:
1230            if res.exit_status == self.NO_SYMLINKS_CODE:
1231                logging.warning('servod log latest symlinks not found. '
1232                                'This is likely due to an error starting up '
1233                                'servod. Ignoring..')
1234            else:
1235                logging.warning('Failed to find servod logs on servo host.')
1236                logging.warning(res.stderr.strip())
1237            return None
1238        fname = os.path.basename(self._to_str(res.stdout.strip()))
1239        # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
1240        if type(fname) == type(b' '):
1241            fname = fname.decode("utf-8")
1242        ts_match = self.TS_EXTRACTOR.match(fname)
1243        if not ts_match:
1244            logging.warning('Failed to extract timestamp from servod log file '
1245                            '%r. Skipping. The servo host is using outdated '
1246                            'servod logging and needs to be updated.', fname)
1247            return None
1248        return ts_match.group(self.TS_GROUP)
1249
1250    def get_servohost_logs(self, outdir):
1251        """Get logs that can help debugging servo/servod problem from
1252        the servohost
1253
1254        @param outdir: directory to create a subdirectory into to place the
1255                       servod logs into.
1256        """
1257        log_dir = os.path.join(outdir, 'servohost_%s' % self.hostname)
1258        if os.path.isdir(log_dir):
1259            # In multi-DUTs testing, each DUTs will may their own servohost
1260            # instance, where could cause duplicate efforts if they share a
1261            # same servohost, so we can just skip the collect if the log
1262            # dir already exists.
1263            logging.info(
1264                    'Skip dmesg and messages logs collecting as %s'
1265                    ' already exists.', log_dir)
1266            return
1267        logging.info('Collecting dmesg and messages from servohost %s',
1268                     self.hostname)
1269        os.mkdir(log_dir)
1270        logging.info('Saving servohost logs to %s.', log_dir)
1271        # First collect dmesg from the servohost.
1272        crashcollect.collect_command(self, 'dmesg -H',
1273                                     os.path.join(log_dir, 'dmesg'))
1274        # Collect messages log from the servohost.
1275        if not self.is_containerized_servod():
1276            try:
1277                self.get_file('/var/log/messages', log_dir, try_rsync=False)
1278            except error.AutoservRunError:
1279                logging.warning(
1280                        'Failed to collect messages log from servohost.')
1281
1282    def get_servod_startup_log(self, outdir):
1283        """Get servod start_up log, this log is available even servod was
1284        not started successfully.
1285
1286        @param outdir: directory to create a subdirectory into to place the
1287                       servod logs into.
1288        """
1289        if self.is_containerized_servod():
1290            return
1291        log_dir = os.path.join(outdir, 'servod_startup_%s' % self.servo_port)
1292        os.mkdir(log_dir)
1293        start_up_log = '/var/log/servod_%s.STARTUP.log' % self.servo_port
1294        try:
1295            self.get_file(start_up_log, log_dir, try_rsync=False)
1296        except error.AutoservRunError:
1297            logging.warning('Failed to collect servod start up log'
1298                            ' from servohost.')
1299
1300    def get_instance_logs(self, instance_ts, outdir, old=False):
1301        """Collect all logs with |instance_ts| and dump into a dir in |outdir|
1302
1303        This method first collects all logs on the servo_host side pertaining
1304        to this servod instance (port, instatiation). It glues them together
1305        into combined log.[level].txt files and extracts all available MCU
1306        console I/O from the logs into individual files e.g. servo_v4.txt
1307
1308        All the output can be found in a directory inside |outdir| that
1309        this generates based on |LOG_DIR|, the servod port, and the instance
1310        timestamp on the servo_host side.
1311
1312        @param instance_ts: log timestamp to grab logfiles for
1313        @param outdir: directory to create a subdirectory into to place the
1314                       servod logs into.
1315        @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
1316        """
1317        # Create the local results log dir.
1318        log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
1319                                                     str(self.servo_port),
1320                                                     instance_ts))
1321        if old:
1322            log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
1323        logging.info('Saving servod logs to %r.', log_dir)
1324        os.mkdir(log_dir)
1325        # Now, get all files with that timestamp.
1326        cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
1327                                                       instance_ts)
1328        res = self.run(cmd, stderr_tee=None, ignore_status=True)
1329        files = self._to_str(res.stdout.strip()).split()
1330        try:
1331            if self.is_containerized_servod():
1332                client = docker_utils.get_docker_client()
1333                container = client.containers.get(self.servod_container_name)
1334                for f in files:
1335                    file_stream, stat = container.get_archive(f)
1336                    tf = tempfile.NamedTemporaryFile(delete=False)
1337                    for block in file_stream:
1338                        tf.write(block)
1339                    tf.close()
1340                    pw_tar = tarfile.TarFile(tf.name)
1341                    pw_tar.extractall(log_dir)
1342                    os.remove(tf.name)
1343            else:
1344                self.get_file(files, log_dir, try_rsync=False)
1345
1346            if not os.listdir(log_dir):
1347                logging.info('No servod logs retrieved. Ignoring, and removing '
1348                             '%r again.', log_dir)
1349                os.rmdir(log_dir)
1350                return
1351        except docker.errors.NotFound:
1352            logging.info("Servod container %s not found no need to stop it.",
1353                         self.hostname)
1354        except error.AutoservRunError as e:
1355            result = e.result_obj
1356            if result.exit_status != 0:
1357                stderr = result.stderr.strip()
1358                logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
1359                                stderr or '\n%s' % result)
1360            # Remove the log_dir as nothing was added to it.
1361            os.rmdir(log_dir)
1362            return
1363        local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
1364        # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
1365        for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
1366            # Create the joint files for each loglevel. i.e log.DEBUG
1367            joint_file = self.JOINT_LOG_PREFIX
1368            if level_name:
1369                joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
1370            # This helps with some online tools to avoid complaints about an
1371            # unknown filetype.
1372            joint_file = joint_file + '.txt'
1373            joint_path = os.path.join(log_dir, joint_file)
1374            files = [f for f in local_files if level_name in f]
1375            if not files:
1376                # TODO(crrev.com/c/1793030): remove no-level case once CL
1377                # is pushed
1378                continue
1379            # Extract compressed logs if any.
1380            compressed, extracted = self._extract_compressed_logs(log_dir,
1381                                                                  files)
1382            files = list(set(files) - set(compressed))
1383            files.extend(extracted)
1384            # Need to sort. As they all share the same timestamp, and
1385            # loglevel, the index itself is sufficient. The highest index
1386            # is the oldest file, therefore we need a descending sort.
1387            def sortkey(f, level=level_name):
1388                """Custom sortkey to sort based on rotation number int."""
1389                if f.endswith(level_name): return 0
1390                return int(f.split('.')[-1])
1391
1392            files.sort(reverse=True, key=sortkey)
1393            # Just rename the first file rather than building from scratch.
1394            os.rename(files[0], joint_path)
1395            with open(joint_path, 'a') as joint_f:
1396                for logfile in files[1:]:
1397                    # Transfer the file to the joint file line by line.
1398                    with open(logfile, 'r') as log_f:
1399                        for line in log_f:
1400                            joint_f.write(line)
1401                    # File has been written over. Delete safely.
1402                    os.remove(logfile)
1403            # Need to remove all files form |local_files| so we don't
1404            # analyze them again.
1405            local_files = list(set(local_files) - set(files) - set(compressed))
1406        # Lastly, extract MCU logs from the joint logs.
1407        self._extract_mcu_logs(log_dir)
1408
1409    def _lock(self):
1410        """lock servohost by touching a file.
1411        """
1412        logging.debug('Locking servohost %s by touching %s file',
1413                      self.hostname, self._lock_file)
1414        self.run('touch %s' % self._lock_file, ignore_status=True)
1415        self._is_locked = True
1416
1417    def _unlock(self):
1418        """Unlock servohost by removing the lock file.
1419        """
1420        logging.debug('Unlocking servohost by removing %s file',
1421                      self._lock_file)
1422        self.run('rm %s' % self._lock_file, ignore_status=True)
1423        self._is_locked = False
1424
1425    def close(self):
1426        """Close the associated servo and the host object."""
1427        # NOTE: throughout this method there are multiple attempts to catch
1428        # all errors. This is WAI as log grabbing should not fail tests.
1429        # However, the goal is to catch and handle/process all errors, thus
1430        # we print the traceback and ask for a bug.
1431        if self._closed:
1432            logging.debug('ServoHost is already closed.')
1433            return
1434
1435        # Only attempt ssh related actions if servohost is sshable. We call
1436        # check_cached_up_status() first because it's lightweighted and return
1437        # much faster in the case servohost is down, however, we still want
1438        # to call is_up() later since check_cached_up_status() is ping based check
1439        # and not guarantee the servohost is sshable.
1440        servo_host_ready = self.check_cached_up_status() and self.is_up()
1441
1442        # If the dockerized servod is in used, we can start a new servod container
1443        # with out the servod process for log collection.
1444        if (not self.is_localhost() and not servo_host_ready
1445                    and self.is_containerized_servod()):
1446            logging.info(
1447                    'Start servod container without servod for log collection.'
1448            )
1449            self.start_containerized_servod(with_servod=False)
1450            servo_host_ready = True
1451
1452        # TODO(crbug.com/1011516): once enabled, remove the check against
1453        # localhost and instead check against log-rotiation enablement.
1454        should_collect_log = (servo_host_ready and self.job
1455                              and not self.is_localhost())
1456        if should_collect_log:
1457            instance_ts = self.get_instance_logs_ts()
1458        else:
1459            logging.info('Servohost is down, will skip servod log collecting.')
1460            instance_ts = None
1461        servod_logs_available = instance_ts is not None
1462        if servod_logs_available:
1463            # Probe whether there was a servod restart, and grab those old
1464            # logs as well.
1465            try:
1466                self.probe_servod_restart(instance_ts, self.job.resultdir)
1467            except (error.AutoservRunError, error.TestFail) as e:
1468                logging.info('Failed to grab servo logs due to: %s. '
1469                             'This error is forgiven.', str(e))
1470            except Exception as e:
1471                logging.error('Unexpected error probing for old logs. %s. '
1472                              'Forgiven. Please file a bug and fix or catch '
1473                              'in log probing function', str(e),
1474                              exc_info=True)
1475        if self._servo:
1476            outdir = None if not self.job else self.job.resultdir
1477            # In some cases when we run as lab-tools, the job object is None.
1478            self._servo.close(outdir)
1479        try:
1480            if should_collect_log:
1481                self.get_servod_startup_log(self.job.resultdir)
1482                self.get_servohost_logs(self.job.resultdir)
1483            # Grab current (not old like above) logs after the servo instance
1484            # was closed out.
1485            if servod_logs_available:
1486                self.get_instance_logs(instance_ts, self.job.resultdir)
1487        except error.AutoservRunError as e:
1488            logging.info(
1489                    'Failed to grab servo logs due to: %s. '
1490                    'This error is forgiven.', str(e))
1491        except Exception as e:
1492            logging.error(
1493                    'Unexpected error grabbing servod logs. %s. '
1494                    'Forgiven. Please file a bug and fix or catch '
1495                    'in log grabbing function',
1496                    str(e),
1497                    exc_info=True)
1498
1499        if self._is_locked and servo_host_ready:
1500            # Remove the lock if the servohost has been locked.
1501            try:
1502                self._unlock()
1503            except error.AutoservSSHTimeout:
1504                logging.error('Unlock servohost failed due to ssh timeout.'
1505                              ' It may caused by servohost went down during'
1506                              ' the task.')
1507        # We want always stop servod after task to minimum the impact of bad
1508        # servod process interfere other servods.(see crbug.com/1028665)
1509        if servo_host_ready:
1510            try:
1511                self.stop_servod()
1512            except error.AutoservRunError as e:
1513                logging.info(
1514                        "Failed to stop servod due to:\n%s\n"
1515                        "This error is forgiven.", str(e))
1516
1517        super(ServoHost, self).close()
1518        # Mark closed.
1519        self._closed = True
1520
1521    def get_servo_state(self):
1522        return self._servo_state
1523
1524    def _get_host_metrics_data(self):
1525        return {'port': self.servo_port,
1526                'host': self.get_dut_hostname() or self.hostname,
1527                'board': self.servo_board or ''}
1528
1529    def is_servo_board_present_on_servo_v3(self):
1530        """Check if servo board is detected on servo_v3"""
1531        logging.debug('Started to detect servo board on servo_v3')
1532        vid_pids = ['18d1:5004', '0403:6014']
1533        not_detected = 'The servo board is not detected on servo_v3'
1534        try:
1535            cmd = 'lsusb | grep "%s"' % "\|".join(vid_pids)
1536            result = self.run(cmd, ignore_status=True, timeout=30)
1537            if result.exit_status == 0 and self._to_str(result.stdout.strip()):
1538                logging.debug('The servo board is detected on servo_v3')
1539                return True
1540            logging.debug('%s; %s', not_detected, result)
1541            return False
1542        except Exception as e:
1543            # can be triggered by timeout issue due running the script
1544            metrics.Counter(
1545                'chromeos/autotest/repair/servo_detection/timeout'
1546                ).increment(fields=self._get_host_metrics_data())
1547            logging.error('%s; %s', not_detected, str(e))
1548        return None
1549
1550    def _require_cr50_servod_config(self):
1551        """Check whether we need start servod with CONFIG=cr50.xml"""
1552        dut_host_info = self.get_dut_host_info()
1553        if not dut_host_info:
1554            return False
1555        for pool in dut_host_info.pools:
1556            if pool.startswith(servo_constants.CR50_CONFIG_POOL_PREFIX):
1557                return True
1558        return False
1559
1560    def get_verifier_state(self, tag):
1561        """Return the state of servo verifier.
1562
1563        @returns: bool or None
1564        """
1565        return self._repair_strategy.verifier_is_good(tag)
1566
1567    def get_repair_strategy_node(self, tag):
1568        """Return the instance of verifier/repair node for host by tag.
1569
1570        @returns: _DependencyNode or None
1571        """
1572        return self._repair_strategy.node_by_tag(tag)
1573
1574    def determine_servo_state(self):
1575        """Determine servo state based on the failed verifier.
1576
1577        @returns: servo state value
1578        The state detecting based on first fail verifier or collecting of
1579        them.
1580        """
1581        ssh = self.get_verifier_state('connection')
1582        servo_root_present = self.get_verifier_state('servo_root_present')
1583        servo_root_present_node = self.get_repair_strategy_node(
1584                'servo_root_present')
1585        servo_v3_present = self.get_verifier_state('servo_v3_root_present')
1586        servo_fw = self.get_verifier_state('servo_fw')
1587        servo_fw_update = self.get_repair_strategy_node('servo_fw_update')
1588        servod_dut_controller_missing = self.get_repair_strategy_node(
1589                'servod_dut_controller_missing')
1590        disk_space = self.get_verifier_state('servo_disk_space')
1591        start_servod = self.get_verifier_state('start_servod')
1592        servod_started = self.get_verifier_state('servod_started')
1593        servod_echo = self.get_verifier_state('servod_echo')
1594        create_servo = self.get_verifier_state('servod_connection')
1595        init_servo = self.get_verifier_state('servod_control')
1596        cr50_low_sbu = self.get_verifier_state('servo_cr50_low_sbu')
1597        cr50_off = self.get_verifier_state('servo_cr50_off')
1598        servo_topology = self.get_verifier_state('servo_topology')
1599        dut_connected = self.get_verifier_state('servo_dut_connected')
1600        hub_connected = self.get_verifier_state('servo_hub_connected')
1601        pwr_button = self.get_verifier_state('servo_pwr_button')
1602        lid_open = self.get_verifier_state('servo_lid_open')
1603        ec_board = self.get_verifier_state('servo_ec_console')
1604        cr50_console = self.get_verifier_state('servo_cr50_console')
1605        ccd_testlab = self.get_verifier_state('servo_ccd_testlab')
1606
1607        if not ssh:
1608            return servo_constants.SERVO_STATE_NO_SSH
1609        if start_servod == hosts.VERIFY_FAILED:
1610            return servo_constants.SERVO_STATE_SERVO_HOST_ISSUE
1611        if servo_root_present == hosts.VERIFY_FAILED:
1612            if not self.servo_serial:
1613                return servo_constants.SERVO_STATE_WRONG_CONFIG
1614            if hasattr(servo_root_present_node, 'serial_mismatch'):
1615                return servo_constants.SERVO_STATE_SERIAL_MISMATCH
1616            return servo_constants.SERVO_STATE_NOT_CONNECTED
1617        if servo_v3_present == hosts.VERIFY_FAILED:
1618            # if we cannot find required board on servo_v3
1619            return servo_constants.SERVO_STATE_NEED_REPLACEMENT
1620        if servo_fw == hosts.VERIFY_FAILED:
1621            logging.info(servo_fw_update)
1622            if hasattr(servo_fw_update, 'servo_updater_issue_detected'):
1623                return servo_constants.SERVO_STATE_SERVO_UPDATER_ISSUE
1624            return servo_constants.SERVO_STATE_NEED_REPLACEMENT
1625
1626        if dut_connected == hosts.VERIFY_FAILED:
1627            return servo_constants.SERVO_STATE_DUT_NOT_CONNECTED
1628        if hub_connected == hosts.VERIFY_FAILED:
1629            logging.info('Servo HUB not connected')
1630            return servo_constants.SERVO_STATE_DUT_NOT_CONNECTED
1631
1632        if cr50_low_sbu == hosts.VERIFY_FAILED:
1633            return servo_constants.SERVO_STATE_SBU_LOW_VOLTAGE
1634        if cr50_off == hosts.VERIFY_FAILED:
1635            return servo_constants.SERVO_STATE_CR50_NOT_ENUMERATED
1636
1637        if servod_dut_controller_missing == hosts.VERIFY_FAILED:
1638            return servo_constants.SERVO_STATE_SERVOD_DUT_CONTROLLER_MISSING
1639        if servo_topology == hosts.VERIFY_FAILED:
1640            return servo_constants.SERVO_STATE_TOPOLOGY_ISSUE
1641
1642        # TODO(otabek@): detect special cases detected by pwr_button
1643        if dut_connected == hosts.VERIFY_SUCCESS:
1644            if pwr_button == hosts.VERIFY_FAILED:
1645                metrics.Counter(
1646                        'chromeos/autotest/repair/servo_unexpected/pwr_button2'
1647                ).increment(fields=self._get_host_metrics_data())
1648
1649        if (servod_started == hosts.VERIFY_FAILED
1650                    or servod_echo == hosts.VERIFY_FAILED):
1651            return servo_constants.SERVO_STATE_SERVOD_ISSUE
1652
1653        # one of the reason why servo can not initialized
1654        if cr50_console == hosts.VERIFY_FAILED:
1655            return servo_constants.SERVO_STATE_CR50_CONSOLE_MISSING
1656        if ccd_testlab == hosts.VERIFY_FAILED:
1657            return servo_constants.SERVO_STATE_CCD_TESTLAB_ISSUE
1658
1659        if (create_servo == hosts.VERIFY_FAILED
1660                    or init_servo == hosts.VERIFY_FAILED):
1661            return servo_constants.SERVO_STATE_SERVOD_PROXY_ISSUE
1662
1663        if ec_board == hosts.VERIFY_FAILED:
1664            return servo_constants.SERVO_STATE_EC_BROKEN
1665        if pwr_button == hosts.VERIFY_FAILED:
1666            return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1667        if lid_open == hosts.VERIFY_FAILED:
1668            return servo_constants.SERVO_STATE_LID_OPEN_FAILED
1669
1670        metrics.Counter(
1671            'chromeos/autotest/repair/unknown_servo_state'
1672            ).increment(fields=self._get_host_metrics_data())
1673        logging.info('We do not have special state for this failure yet :)')
1674        return servo_constants.SERVO_STATE_BROKEN
1675
1676    def is_servo_topology_supported(self):
1677        """Check if servo_topology is supported."""
1678        if self.is_containerized_servod():
1679            logging.info('Servod is running within a container.')
1680            return True
1681        if not self.is_up_fast():
1682            logging.info('Servo-Host is not reachable.')
1683            return False
1684        if not self.servo_serial:
1685            logging.info('Servo-topology required a servo serial.')
1686            return False
1687        if self.is_labstation():
1688            logging.info('Servod is running within labstation.')
1689            return True
1690        return False
1691
1692    def get_topology(self):
1693        """Get servo topology."""
1694        if not self._topology:
1695            self._topology = servo_topology.ServoTopology(self)
1696        return self._topology
1697
1698    def is_dual_setup(self):
1699        """Check is servo will run in dual setup.
1700
1701        Dual setup used only for servo_v4 when used ccd_cr50 and servo_micro
1702        at the same time.
1703        """
1704        return self.servo_setup == servo_constants.SERVO_SETUP_VALUE_DUAL_V4
1705
1706    def set_dut_health_profile(self, dut_health_profile):
1707        """
1708        @param dut_health_profile: A DeviceHealthProfile object.
1709        """
1710        logging.debug('setting dut_health_profile field to (%s)',
1711                      dut_health_profile)
1712        self._dut_health_profile = dut_health_profile
1713
1714    def get_dut_health_profile(self):
1715        """
1716        @return A DeviceHealthProfile object.
1717        """
1718        return self._dut_health_profile
1719
1720    def print_all_servo_of_host(self):
1721        """Print all servos detected on the host."""
1722        try:
1723            logging.info('\tDevices detected on the host:')
1724            devices = self.get_topology().get_list_available_servos()
1725            for device in devices:
1726                logging.info('\t%s', device)
1727        except Exception as e:
1728            logging.debug('(Not critical) Fail list all servos: %s', e)
1729
1730
1731def make_servo_hostname(dut_hostname):
1732    """Given a DUT's hostname, return the hostname of its servo.
1733
1734    @param dut_hostname: hostname of a DUT.
1735
1736    @return hostname of the DUT's servo.
1737
1738    """
1739    host_parts = dut_hostname.split('.')
1740    host_parts[0] = host_parts[0] + '-servo'
1741    return '.'.join(host_parts)
1742
1743
1744def _map_afe_board_to_servo_board(afe_board):
1745    """Map a board we get from the AFE to a servo appropriate value.
1746
1747    Many boards are identical to other boards for servo's purposes.
1748    This function makes that mapping.
1749
1750    @param afe_board string board name received from AFE.
1751    @return board we expect servo to have.
1752
1753    """
1754    KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1755    BOARD_MAP = {'gizmo': 'panther'}
1756    mapped_board = afe_board
1757    if afe_board in BOARD_MAP:
1758        mapped_board = BOARD_MAP[afe_board]
1759    else:
1760        for suffix in KNOWN_SUFFIXES:
1761            if afe_board.endswith(suffix):
1762                mapped_board = afe_board[0:-len(suffix)]
1763                break
1764    if mapped_board != afe_board:
1765        logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1766    return mapped_board
1767
1768
1769def get_servo_args_for_host(dut_host):
1770    """Return servo data associated with a given DUT.
1771
1772    @param dut_host   Instance of `Host` on which to find the servo
1773                      attributes.
1774    @return `servo_args` dict with host and an optional port.
1775    """
1776    info = dut_host.host_info_store.get()
1777    servo_args = {k: v for k, v in six.iteritems(info.attributes)
1778                  if k in servo_constants.SERVO_ATTR_KEYS}
1779
1780    if servo_constants.SERVO_HOST_SSH_PORT_ATTR in servo_args:
1781        try:
1782            servo_args[servo_constants.SERVO_HOST_SSH_PORT_ATTR] = int(
1783                    servo_args[servo_constants.SERVO_HOST_SSH_PORT_ATTR])
1784        except ValueError:
1785            logging.error('servo host port is not an int: %s',
1786                          servo_args[servo_constants.SERVO_HOST_SSH_PORT_ATTR])
1787            # Reset servo_args because we don't want to use an invalid port.
1788            servo_args.pop(servo_constants.SERVO_HOST_SSH_PORT_ATTR, None)
1789
1790    if servo_constants.SERVO_PORT_ATTR in servo_args:
1791        try:
1792            servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1793                servo_args[servo_constants.SERVO_PORT_ATTR])
1794        except ValueError:
1795            logging.error('servo port is not an int: %s',
1796                          servo_args[servo_constants.SERVO_PORT_ATTR])
1797            # Reset servo_args because we don't want to use an invalid port.
1798            servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
1799
1800    if info.board:
1801        servo_board = _map_afe_board_to_servo_board(info.board)
1802        servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
1803    if info.model:
1804        servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1805    return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
1806
1807
1808def _tweak_args_for_ssp_moblab(servo_args):
1809    if (servo_args[servo_constants.SERVO_HOST_ATTR]
1810            in ['localhost', '127.0.0.1']):
1811        servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
1812                'SSP', 'host_container_ip', type=str, default=None)
1813
1814
1815def create_servo_host(dut,
1816                      servo_args,
1817                      try_lab_servo=False,
1818                      try_servo_repair=False,
1819                      try_servo_recovery=True,
1820                      dut_host_info=None,
1821                      dut_health_profile=None):
1822    """Create a ServoHost object for a given DUT, if appropriate.
1823
1824    This function attempts to create and verify or repair a `ServoHost`
1825    object for a servo connected to the given `dut`, subject to various
1826    constraints imposed by the parameters:
1827      * When the `servo_args` parameter is not `None`, a servo
1828        host must be created, and must be checked with `repair()`.
1829      * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1830        true:
1831          * If `try_servo_repair` is true, then create a servo host and
1832            check it with `repair()`.
1833          * Otherwise, if the servo responds to `ping` then create a
1834            servo host and check it with `verify()`.
1835
1836    In cases where `servo_args` was not `None`, repair failure
1837    exceptions are passed back to the caller; otherwise, exceptions
1838    are logged and then discarded.  Note that this only happens in cases
1839    where we're called from a test (not special task) control file that
1840    has an explicit dependency on servo.  In that case, we require that
1841    repair not write to `status.log`, so as to avoid polluting test
1842    results.
1843
1844    TODO(jrbarnette):  The special handling for servo in test control
1845    files is a thorn in my flesh; I dearly hope to see it cut out before
1846    my retirement.
1847
1848    Parameters for a servo host consist of a host name, port number, and
1849    DUT board, and are determined from one of these sources, in order of
1850    priority:
1851      * Servo attributes from the `dut` parameter take precedence over
1852        all other sources of information.
1853      * If a DNS entry for the servo based on the DUT hostname exists in
1854        the CrOS lab network, that hostname is used with the default
1855        port and the DUT's board.
1856      * If no other options are found, the parameters will be taken
1857        from the `servo_args` dict passed in from the caller.
1858
1859    @param dut:                 An instance of `Host` from which to take
1860                                servo parameters (if available).
1861    @param servo_args:          A dictionary with servo parameters to use if
1862                                they can't be found from `dut`.  If this
1863                                argument is supplied, unrepaired exceptions
1864                                from `verify()` will be passed back to the
1865                                caller.
1866    @param try_lab_servo:       If not true, servo host creation will be
1867                                skipped unless otherwise required by the
1868                                caller.
1869    @param try_servo_repair:    If true, check a servo host with
1870                                `repair()` instead of `verify()`.
1871    @param try_servo_recovery:  If true, start servod in recovery mode.
1872                                Default value is True.
1873    @param dut_host_info:       A HostInfo object of the DUT that connected
1874                                to this servo.
1875    @param dut_health_profile:  DUT repair info with history.
1876
1877    @returns: A ServoHost object or None. See comments above.
1878
1879    """
1880    # We are explicitly looking for if servo_args is None here(which means
1881    # servo not needed), as servo_args == {} means servo is needed and
1882    # we expect load servo_args from host_info_store.
1883    if servo_args is None:
1884        servo_dependency = False
1885        local_run = False
1886    else:
1887        servo_dependency = True
1888        # If servo_args pass in directly, then this is a local test run.
1889        local_run = servo_constants.SERVO_HOST_ATTR in servo_args
1890
1891    if local_run:
1892        logging.warning('User input servo_args detected, will attempt'
1893                        ' to start servod and initialize servo conncetion'
1894                        ' directly. All servo/servohost verify and repair'
1895                        ' steps will be skipped.')
1896
1897    # Loading servo args from host_info_store.
1898    if dut is not None and (try_lab_servo or servo_dependency):
1899        servo_args_override = get_servo_args_for_host(dut)
1900        if servo_args_override is not None:
1901            if utils.in_moblab_ssp():
1902                _tweak_args_for_ssp_moblab(servo_args_override)
1903            logging.debug(
1904                    'Overriding provided servo_args (%s) with arguments'
1905                    ' determined from the host (%s)',
1906                    servo_args,
1907                    servo_args_override,
1908            )
1909            servo_args = servo_args_override
1910
1911    if not servo_args:
1912        logging.debug('No servo_args provided, and failed to find overrides.')
1913        if try_lab_servo or servo_dependency:
1914            return None, servo_constants.SERVO_STATE_MISSING_CONFIG
1915        else:
1916            # For regular test case which not required the servo
1917            return None, None
1918
1919    servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1920    servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
1921    if not local_run:
1922        if not _is_servo_host_information_exist(servo_hostname, servo_port):
1923            logging.debug(
1924                    'Servo connection info missed hostname: %s , port: %s',
1925                    servo_hostname, servo_port)
1926            return None, servo_constants.SERVO_STATE_MISSING_CONFIG
1927        if not is_servo_host_information_valid(servo_hostname, servo_port):
1928            logging.debug(
1929                    'Servo connection info is incorrect hostname: %s , port: %s',
1930                    servo_hostname, servo_port)
1931            return None, servo_constants.SERVO_STATE_WRONG_CONFIG
1932
1933        if try_servo_recovery == True:
1934            servo_args[servo_constants.SERVO_RECOVERY_MODE] = True
1935
1936    newhost = ServoHost(**servo_args)
1937    if local_run:
1938        try:
1939            newhost.start_servod()
1940        except:
1941            # If we failed to start servod here, we can assume the servod
1942            # either already started or the test is running against a
1943            # non-standard servohost so the user will resiponsble for ensure
1944            # servod is running.
1945            pass
1946        try:
1947            newhost.initialize_servo()
1948            newhost.initialize_dut_for_servo()
1949            newhost._servo_state = servo_constants.SERVO_STATE_WORKING
1950            return newhost, newhost.get_servo_state()
1951        except Exception as e:
1952            logging.error('Failed to initialize servo. %s', e)
1953            return None, servo_constants.SERVO_STATE_BROKEN
1954
1955    if newhost.is_containerized_servod():
1956        # TODO(otabek@): Update for servod-manager.
1957        # Servod docker is not available for access.
1958        newhost.start_containerized_servod()
1959    # use_icmp is always set to true in autotest, but in labpack
1960    # this is determined by autoserv args. We need to make this consistent
1961    # across labpack and autotest b/205855910
1962    elif newhost.use_icmp and not newhost.is_up_fast(count=3):
1963        # ServoHost has internal check to wait if servo-host is in reboot
1964        # process. If servo-host still is not available this check will stop
1965        # further attempts as we do not have any option to recover servo_host.
1966        return None, servo_constants.SERVO_STATE_NO_SSH
1967
1968    # Reset or reboot servo device only during AdminRepair tasks.
1969    if try_servo_repair:
1970        if newhost._is_locked:
1971            # Print available servos on the host for debugging.
1972            newhost.print_all_servo_of_host()
1973            # Reset servo if the servo is locked, as we check if the servohost
1974            # is up, if the servohost is labstation and if the servohost is in
1975            # lab inside the locking logic.
1976            newhost.reset_servo()
1977        else:
1978            try:
1979                newhost.reboot_servo_v3_on_need()
1980            except Exception as e:
1981                logging.info('[Non-critical] Unexpected error while trying to'
1982                              ' reboot servo_v3, skipping the reboot; %s', e)
1983
1984    if dut:
1985        newhost.set_dut_hostname(dut.hostname)
1986    if dut_host_info:
1987        newhost.set_dut_host_info(dut_host_info)
1988    if (dut_health_profile and (try_lab_servo or try_servo_repair)):
1989        try:
1990            if newhost.is_localhost():
1991                logging.info('Servohost is a localhost, skip device'
1992                             ' health profile setup...')
1993            else:
1994                dut_health_profile.init_profile(newhost)
1995                newhost.set_dut_health_profile(dut_health_profile)
1996        except Exception as e:
1997            logging.info(
1998                    '[Non-critical] Unexpected error while trying to'
1999                    ' load device health profile; %s', e)
2000
2001    # Note that the logic of repair() includes everything done
2002    # by verify().  It's sufficient to call one or the other;
2003    # we don't need both.
2004    if servo_dependency:
2005        newhost.repair(silent=True)
2006        return newhost, newhost.get_servo_state()
2007
2008    if try_servo_repair:
2009        try:
2010            newhost.repair()
2011        except Exception:
2012            logging.exception('servo repair failed for %s', newhost.hostname)
2013    else:
2014        try:
2015            newhost.verify()
2016        except Exception:
2017            logging.exception('servo verify failed for %s', newhost.hostname)
2018    return newhost, newhost.get_servo_state()
2019
2020
2021def _is_servo_host_information_exist(hostname, port):
2022    if hostname is None or len(hostname.strip()) == 0:
2023        return False
2024    if port is None:
2025        return False
2026    if not type(port) is int:
2027        try:
2028            int(port)
2029        except ValueError:
2030            return False
2031    return True
2032
2033
2034def is_servo_host_information_valid(hostname, port):
2035    """Check if provided servo attributes are valid.
2036
2037    @param hostname Hostname of the servohost.
2038    @param port     servo port number.
2039
2040    @returns: A bool value to indicate if provided servo attribute valid.
2041    """
2042    if not _is_servo_host_information_exist(hostname, port):
2043        return False
2044    # checking range and correct of the port
2045    port_int = int(port)
2046    if port_int < 1 or port_int > 65000:
2047        return False
2048    # we expecting host contain only latters, digits and '-' or '_'
2049    if not re.match('[a-zA-Z0-9-_\.:]*$', hostname) or len(hostname) < 5:
2050        return False
2051    return True
2052