1# Copyright (c) 2014 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging, os, re 6from autotest_lib.client.bin import test, utils 7from autotest_lib.client.common_lib import error 8 9 10class hardware_StorageWearoutDetect(test.test): 11 """ 12 Check wear out status for storage device available in SMART for SSD and 13 in ext_csd for eMMC version 5.0 or later. For previous version of eMMC, 14 it will be treat as data not available. 15 16 The test will be failed if: 17 - At least one SMART variable has value under its threshold 18 or 19 - Percentage Used reported by SMART for NVMe or SATA is above 90 20 or 21 - eMMC wear out status variable is in 90-100% band or higher ( 22 DEVICE_LIFE_TIME_EST_TYP_A). Seeing this consistently means the lab 23 device may have to be replaced. 24 """ 25 26 version = 1 27 STORAGE_INFO_PATH = '/var/log/storage_info.txt' 28 STORAGE_INFO_COMMON_PATH = '/usr/share/misc/storage-info-common.sh' 29 30 # Example "SATA Version is: SATA 3.1, 6.0 Gb/s (current: 6.0 Gb/s)" 31 SATA_DETECT = r"SATA Version is:.*" 32 33 # Example " Extended CSD rev 1.7 (MMC 5.0)" 34 MMC_DETECT = r"\s*Extended CSD rev.*MMC (?P<version>\d+.\d+)" 35 36 # Example "SMART/Health Information (NVMe Log 0x02, NSID 0xffffffff)" 37 NVME_DETECT = r".*NVMe Log .*" 38 39 # Field meaning and example line that have failing attribute 40 # ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE 41 # 184 End-to-End_Error PO--CK 001 001 097 NOW 135 42 SSD_FAIL = r"""\s*(?P<param>\S+\s\S+) # ID and attribute name 43 \s+[P-][O-][S-][R-][C-][K-] # flags 44 (\s+\d{3}){3} # three 3-digits numbers 45 \s+NOW # fail indicator""" 46 47 # We want to detect and fail if we see a non-zero value for either 48 # attribute 160 Uncorrectable_Error_Cnt or attribute 187 Reported_Uncorrect 49 # ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE 50 # 160 Uncorrectable_Error_Cnt ----- 100 100 100 - 10 51 SATA_FAIL = r"""\s*(?P<param>(160\s+Uncorrectable_Error_Cnt| 52 187\s+Reported_Uncorrect)) 53 \s+[P-][O-][S-][R-][C-][K-] 54 (\s+\d{1,3}){3} 55 \s+(NOW|[-]) 56 \s+[1-9][0-9]*""" 57 58 # Ex "Pre EOL information [PRE_EOL_INFO: 0x02]" 59 # 0x02 means Warning, consumed 80% of reserved blocks 60 # 0x03 means Urgent 61 MMC_FAIL = r".*(?P<param>PRE_EOL_INFO]?: 0x0[23])" 62 63 # Ex Available Spare: 100% 64 # We want to fail when the available spare is below the 65 # available spare threshold. 66 NVME_SPARE = r"Available Spare:\s+(?P<param>\d{1,3})%" 67 68 #Available Spare Threshold: 10% 69 NVME_THRESH = r"Available Spare Threshold:\s+(?P<param>\d{1,3})%" 70 71 def run_once(self, use_cached_result=True): 72 """ 73 Run the test 74 75 @param use_cached_result: Use the result that generated when machine 76 booted or generate new one. 77 """ 78 79 if not use_cached_result: 80 if not os.path.exists(self.STORAGE_INFO_COMMON_PATH): 81 msg = str('Test failed with error: %s not exist' 82 % self.STORAGE_INFO_COMMON_PATH) 83 raise error.TestFail(msg) 84 cmd = ' '.join(['. %s;' % (self.STORAGE_INFO_COMMON_PATH, ), 85 'get_storage_info']) 86 utils.run(cmd, stdout_tee=open(self.STORAGE_INFO_PATH, 'w'), 87 stderr_tee=utils.TEE_TO_LOGS) 88 89 # Check that storage_info file exist. 90 if not os.path.exists(self.STORAGE_INFO_PATH): 91 msg = str('Test failed with error: %s not exist' 92 % self.STORAGE_INFO_PATH) 93 raise error.TestFail(msg) 94 95 mmc_detect = False 96 sata_detect = False 97 legacy_mmc = False 98 nvme_detect = False 99 fail_msg = '' 100 101 with open(self.STORAGE_INFO_PATH) as f: 102 for line in f: 103 m = re.match(self.SATA_DETECT, line) 104 if m: 105 sata_detect = True 106 logging.info('Found SATA device') 107 108 m = re.match(self.MMC_DETECT, line) 109 if m: 110 version = m.group('version') 111 if float(version) < 5.0: 112 legacy_mmc = True 113 mmc_detect = True 114 logging.info('Found eMMC version %s', version) 115 116 m = re.match(self.NVME_DETECT, line) 117 if m: 118 nvme_detect = True 119 logging.info('Found NVMe device') 120 121 m = re.match(self.SSD_FAIL, line, re.X) 122 if m: 123 param = m.group('param') 124 fail_msg += 'SSD failure ' + param 125 126 m = re.match(self.MMC_FAIL, line) 127 if m: 128 param = m.group('param') 129 fail_msg += 'MMC failure ' + param 130 131 m = re.match(self.SATA_FAIL, line, re.X) 132 if m: 133 param = m.group('param') 134 fail_msg += 'SATA failure, attribute ' + param 135 136 m = re.match(self.NVME_SPARE, line) 137 if m: 138 # Check the next line for the available spare threshold. 139 # Fail if available spare is below the threshold. 140 spare = m.group('param') 141 nextLine = next(f) 142 nm = re.match(self.NVME_THRESH, nextLine) 143 if nm: 144 thresh = nm.group('param') 145 if int(spare) < int(thresh): 146 fail_msg += 'NVMe failure, Available Spare ' + \ 147 spare + '% below threshold ' + \ 148 thresh + '%' 149 150 if not sata_detect and not mmc_detect and not nvme_detect: 151 raise error.TestFail('Can not detect storage device.') 152 153 if fail_msg: 154 msg = 'Detected wearout parameter:%s' % fail_msg 155 raise error.TestFail(msg) 156 157 if legacy_mmc: 158 msg = 'eMMC version %s detected. ' % version 159 msg += 'Wearout attributes are supported in eMMC 5.0 and later.' 160 logging.info(msg) 161