1# Lint as: python2, python3 2# Copyright (c) 2014 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import logging, os, re 7from autotest_lib.client.bin import test, utils 8from autotest_lib.client.common_lib import error 9 10 11class hardware_StorageWearoutDetect(test.test): 12 """ 13 Check wear out status for storage device available in SMART for SSD and 14 in ext_csd for eMMC version 5.0 or later. For previous version of eMMC, 15 it will be treat as data not available. 16 17 The test will be failed if: 18 - At least one SMART variable has value under its threshold 19 or 20 - Percentage Used reported by SMART for NVMe or SATA is above 90 21 or 22 - eMMC wear out status variable is in 90-100% band or higher ( 23 DEVICE_LIFE_TIME_EST_TYP_A). Seeing this consistently means the lab 24 device may have to be replaced. 25 """ 26 27 version = 1 28 STORAGE_INFO_PATH = '/var/log/storage_info.txt' 29 STORAGE_INFO_COMMON_PATH = '/usr/share/misc/storage-info-common.sh' 30 31 # Example "SATA Version is: SATA 3.1, 6.0 Gb/s (current: 6.0 Gb/s)" 32 SATA_DETECT = r"SATA Version is:.*" 33 34 # Example " Extended CSD rev 1.7 (MMC 5.0)" 35 MMC_DETECT = r"\s*Extended CSD rev.*MMC (?P<version>\d+.\d+)" 36 37 # Example "SMART/Health Information (NVMe Log 0x02, NSID 0xffffffff)" 38 NVME_DETECT = r".*NVMe Log .*" 39 40 # Field meaning and example line that have failing attribute 41 # ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE 42 # 184 End-to-End_Error PO--CK 001 001 097 NOW 135 43 SSD_FAIL = r"""\s*(?P<param>\S+\s\S+) # ID and attribute name 44 \s+[P-][O-][S-][R-][C-][K-] # flags 45 (\s+\d{3}){3} # three 3-digits numbers 46 \s+NOW # fail indicator""" 47 48 # We want to detect and fail if we see a non-zero value for either 49 # attribute 160 Uncorrectable_Error_Cnt or attribute 187 Reported_Uncorrect 50 # ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE 51 # 160 Uncorrectable_Error_Cnt ----- 100 100 100 - 10 52 SATA_FAIL = r"""\s*(?P<param>(160\s+Uncorrectable_Error_Cnt| 53 187\s+Reported_Uncorrect)) 54 \s+[P-][O-][S-][R-][C-][K-] 55 (\s+\d{1,3}){3} 56 \s+(NOW|[-]) 57 \s+[1-9][0-9]*""" 58 59 # Ex "Pre EOL information [PRE_EOL_INFO: 0x02]" 60 # 0x02 means Warning, consumed 80% of reserved blocks 61 # 0x03 means Urgent 62 MMC_FAIL = r".*(?P<param>PRE_EOL_INFO]?: 0x0[23])" 63 64 # Ex Available Spare: 100% 65 # We want to fail when the available spare is below the 66 # available spare threshold. 67 NVME_SPARE = r"Available Spare:\s+(?P<param>\d{1,3})%" 68 69 #Available Spare Threshold: 10% 70 NVME_THRESH = r"Available Spare Threshold:\s+(?P<param>\d{1,3})%" 71 72 def run_once(self, use_cached_result=True): 73 """ 74 Run the test 75 76 @param use_cached_result: Use the result that generated when machine 77 booted or generate new one. 78 """ 79 80 if not use_cached_result: 81 if not os.path.exists(self.STORAGE_INFO_COMMON_PATH): 82 msg = str('Test failed with error: %s not exist' 83 % self.STORAGE_INFO_COMMON_PATH) 84 raise error.TestFail(msg) 85 cmd = ' '.join(['. %s;' % (self.STORAGE_INFO_COMMON_PATH, ), 86 'get_storage_info']) 87 utils.run(cmd, stdout_tee=open(self.STORAGE_INFO_PATH, 'w'), 88 stderr_tee=utils.TEE_TO_LOGS) 89 90 # Check that storage_info file exist. 91 if not os.path.exists(self.STORAGE_INFO_PATH): 92 msg = str('Test failed with error: %s not exist' 93 % self.STORAGE_INFO_PATH) 94 raise error.TestFail(msg) 95 96 mmc_detect = False 97 sata_detect = False 98 legacy_mmc = False 99 nvme_detect = False 100 fail_msg = '' 101 102 with open(self.STORAGE_INFO_PATH) as f: 103 for line in f: 104 m = re.match(self.SATA_DETECT, line) 105 if m: 106 sata_detect = True 107 logging.info('Found SATA device') 108 109 m = re.match(self.MMC_DETECT, line) 110 if m: 111 version = m.group('version') 112 if float(version) < 5.0: 113 legacy_mmc = True 114 mmc_detect = True 115 logging.info('Found eMMC version %s', version) 116 117 m = re.match(self.NVME_DETECT, line) 118 if m: 119 nvme_detect = True 120 logging.info('Found NVMe device') 121 122 m = re.match(self.SSD_FAIL, line, re.X) 123 if m: 124 param = m.group('param') 125 fail_msg += 'SSD failure ' + param 126 127 m = re.match(self.MMC_FAIL, line) 128 if m: 129 param = m.group('param') 130 fail_msg += 'MMC failure ' + param 131 132 m = re.match(self.SATA_FAIL, line, re.X) 133 if m: 134 param = m.group('param') 135 fail_msg += 'SATA failure, attribute ' + param 136 137 m = re.match(self.NVME_SPARE, line) 138 if m: 139 # Check the next line for the available spare threshold. 140 # Fail if available spare is below the threshold. 141 spare = m.group('param') 142 nextLine = next(f) 143 nm = re.match(self.NVME_THRESH, nextLine) 144 if nm: 145 thresh = nm.group('param') 146 if int(spare) < int(thresh): 147 fail_msg += 'NVMe failure, Available Spare ' + \ 148 spare + '% below threshold ' + \ 149 thresh + '%' 150 151 if not sata_detect and not mmc_detect and not nvme_detect: 152 raise error.TestFail('Can not detect storage device.') 153 154 if fail_msg: 155 msg = 'Detected wearout parameter:%s' % fail_msg 156 raise error.TestFail(msg) 157 158 if legacy_mmc: 159 msg = 'eMMC version %s detected. ' % version 160 msg += 'Wearout attributes are supported in eMMC 5.0 and later.' 161 logging.info(msg) 162