1# Copyright 2018 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import re 6import subprocess 7import threading 8import time 9 10# A path of the log file 11_PATH_LOG_FILE = '/var/log/messages' 12# Time margin as a number of seconds. It is used during parsing of the log file. 13# Sometimes, logs are not in strict datetime order. This value defines maximum 14# possible datetime error in logs sequence. 15_TIME_MARGIN = 2 16# Max time in seconds to wait for CUPS logs to appear in the log file. 17_TIME_MAX_WAIT = 100 18# A list of filters that are installed by loading components. 19# Currently, they are Star and Epson drivers. 20_COMPONENT_FILTERS = ['rastertostar', 'rastertostarlm', 'epson-escpr-wrapper'] 21 22 23class LogReader(): 24 """ 25 An instance of this class is responsible for gathering data about CUPS jobs 26 and extracting their results and pipelines by parsing the main log file. It 27 can check for internal errors during a job execution and build a bash script 28 simulating an executed pipeline. CUPS server must have logging level set to 29 'debug' to make a pipeline extraction possible. 30 31 """ 32 33 def _read_last_lines(self, lines_count): 34 """ 35 The function reads given number of lines from the end of the log file. 36 The last line is omitted if it is not ended with EOL character. If a 37 whole file was read, count of returned lines may be smaller than 38 requested number. Always at least one line is returned; if the file has 39 no EOL characters an exception is raised. 40 41 @param lines_count: a number of lines to read 42 43 @returns a list of lines (as strings) 44 45 """ 46 assert lines_count > 0 47 argv = ['tail', '-n', '%d' % (lines_count+1), _PATH_LOG_FILE] 48 p1 = subprocess.Popen(argv, stdout=subprocess.PIPE) 49 out,err = p1.communicate() 50 51 # It is possible for invalid UTF-8 to appear in the system log 52 # (e.g. null bytes on unclean poweroff), but this doesn't 53 # concern us, so we elect to ignore it. 54 lines = out.decode(errors="ignore").split('\n') 55 lines.pop() 56 if len(lines) > lines_count: 57 if len(lines) == 0: 58 raise Exception('The file %s is empty' % _PATH_LOG_FILE) 59 lines = lines[1:] 60 return lines 61 62 63 def _get_datetime_from_line(self, line): 64 """ 65 Parse datetime from the given line read from the log file. 66 67 @param line_count: single line from the log files 68 69 @returns a datetime as a number of seconds from the epoch (float) 70 71 """ 72 return time.mktime(time.strptime(line[:20], "%Y-%m-%dT%H:%M:%S.")) 73 74 75 def __init__(self): 76 """ 77 Constructor. 78 79 """ 80 # the last line read from the log file 81 self._last_line_read = self._read_last_lines(1)[0] 82 # logs obtained from the log file after preprocessing 83 # key = a tuple (CUPS pid, job id) 84 # value = list with logs 85 self._logs = dict() 86 # data extracted from logs 87 # key = a tuple (CUPS pid, job id) 88 # value = a tuple (result(bool),envp(dict),argv(list),filters(list)) 89 self._results = dict() 90 # mapping from printer's name to a list of CUPS jobs 91 # key = printer name 92 # value = a list of tuples (CUPS pid, job id) 93 self._printer_name_to_key = dict() 94 # a lock for synchronization 95 self._critical_section = threading.Lock() 96 97 98 def _parse_logs(self): 99 """ 100 This method read and parses new lines from the log file (all complete 101 lines after the line saved in self._last_line_read). The lines are 102 filtered and saved in self._logs. Dictionaries self._results and 103 self._printer_name_to_key are updated accordingly. 104 105 """ 106 # ============================================== 107 # try to read lines from the end of the log file until the line from 108 # self._last_line_read is found 109 dt_last_line_read = self._get_datetime_from_line(self._last_line_read) 110 lines_count = 1024 111 pos_last_line_read = None 112 while True: 113 lines = self._read_last_lines(lines_count) 114 # search for self._last_line_read in lines read from the file 115 for index,line in enumerate(lines): 116 dt_line = self._get_datetime_from_line(line) 117 if dt_line > dt_last_line_read + _TIME_MARGIN: 118 # a datetime of the line is far behind, exit the loop 119 break 120 if line == self._last_line_read: 121 # the line was found, save its index and exit the loop 122 pos_last_line_read = index 123 break 124 # if the self._last_line_read was found, exit the 'while' loop 125 if pos_last_line_read is not None: 126 break 127 # check if we can try to read more lines from the file to repeat 128 # the search, if not then throw an exception 129 if ( len(lines) < lines_count or 130 self._get_datetime_from_line(lines[0]) + 131 _TIME_MARGIN < dt_last_line_read ): 132 raise Exception('Cannot find the last read line in the log' 133 ' file (maybe it has been erased?)') 134 # increase the number of lines to read 135 lines_count = lines_count * 2 136 # check if there are any new lines, exit if not 137 if pos_last_line_read + 1 == len(lines): 138 return 139 # update the field with the last line read from the log file 140 self._last_line_read = lines[-1] 141 # ============================================== 142 # parse logs, select these ones from CUPS jobs and save them to 143 # self._logs 144 updated_jobs = set() 145 for line in lines[pos_last_line_read+1:]: 146 tokens = line.split(' ', 5) 147 if ( len(tokens) < 6 or tokens[2][:6] != 'cupsd[' or 148 tokens[3] != '[Job' ): 149 continue 150 pid = int(tokens[2][6:-2]) 151 job_id = int(tokens[4][:-1]) 152 message = tokens[5] 153 key = (pid,job_id) 154 if key not in self._logs: 155 self._logs[key] = [] 156 self._logs[key].append(message) 157 updated_jobs.add(key) 158 # ============================================== 159 # try to interpret logs 160 for key in updated_jobs: 161 number_of_processes_to_check = None 162 printer_name = None 163 result = True 164 envp = dict() 165 argv = [] 166 filters = [] 167 for msg in self._logs[key]: 168 if number_of_processes_to_check is None: 169 regexp = re.match(r'(\d) filters for job:', msg) 170 if regexp is not None: 171 number_of_processes_to_check = int(regexp.group(1)) + 1 172 assert number_of_processes_to_check > 0 173 elif printer_name is None: 174 regexp = re.match(r'(\S+) \(\S+ to (\S+), cost \d+\)', msg) 175 if regexp is not None: 176 fltr = regexp.group(1) 177 if fltr == '-': 178 number_of_processes_to_check -= 1 179 else: 180 filters.append(fltr) 181 trg = regexp.group(2) 182 if trg.startswith('printer/') and '/' not in trg[8:]: 183 printer_name = trg[8:] 184 else: 185 regexp = re.match(r'envp\[\d+\]="(\S+)=(\S+)"', msg) 186 if regexp is not None: 187 envp[regexp.group(1)] = regexp.group(2) 188 continue 189 regexp = re.match(r'argv\[(\d+)\]="(.+)"', msg) 190 if regexp is not None: 191 if len(argv) != int(regexp.group(1)): 192 raise Exception('Error when parsing argv for %s' 193 % printer_name) 194 argv.append(regexp.group(2)) 195 continue 196 if number_of_processes_to_check > 0: 197 regexp = re.match(r'PID \d+ \(\S+\) ', msg) 198 if regexp is not None: 199 number_of_processes_to_check -= 1 200 if ( not msg.endswith(' exited with no errors.') and 201 'was terminated normally with signal' 202 not in msg ): 203 result = False 204 if number_of_processes_to_check == 0: 205 # if it is a new job, update self._printer_name_to_key 206 if key not in self._results: 207 if printer_name not in self._printer_name_to_key: 208 self._printer_name_to_key[printer_name] = [key] 209 else: 210 self._printer_name_to_key[printer_name].append(key) 211 # update results 212 self._results[key] = (result,envp,argv,filters) 213 214 215 def _build_pipeline( 216 self, printer_name, envp, argv, filters, 217 path_ppd, path_doc, path_workdir): 218 """ 219 This function tries to build a bash script containing a pipeline used 220 internally by CUPS. All printer's parameters are passed by the field 221 self._printers_data. Resultant script is saved in self._pipelines. 222 223 @param printer_name: a printer's name 224 @param envp: a dictionary with environment variables for the pipeline 225 @param argv: a list with command line parameters for filters 226 @param filters: a list of filters 227 @param path_ppd: a path to PPD file (string) 228 @param path_doc: a path to input document (string) 229 @param path_workdir: a path to a directory, that should be used as home 230 directory and temporary location by filters 231 232 @returns a pipeline as a bash script (string) 233 234 """ 235 # update paths set in environment variables 236 envp['PPD'] = path_ppd 237 envp['HOME'] = path_workdir 238 envp['TMPDIR'] = path_workdir 239 # drop the last argument, it is an input file 240 argv.pop() 241 # replace filter names by full path for Star and Epson drivers 242 if filters[-1] in _COMPONENT_FILTERS: 243 find_cmd = ['find', '/run/imageloader/', '-type', 'f', '-name'] 244 find_cmd.append(filters[-1]) 245 filters[-1] = subprocess.check_output(find_cmd).rstrip() 246 # build and return the script 247 script = '#!/bin/bash\nset -e\nset -o pipefail\n' 248 for name, value in envp.items(): 249 script += ('export %s=%s\n' % (name, value)) 250 for ind, filt in enumerate(filters): 251 if ind > 0: 252 script += ('gzip -dc %d.doc.gz | ' % ind) 253 script += ('(exec -a "%s" %s' % (argv[0], filt)) 254 for arg in argv[1:]: 255 script += (' "%s"' % arg) 256 if ind == 0: 257 script += (' "%s"' % path_doc) 258 script += (') 2>%d.err | gzip -9 >%d.doc.gz\n' % (ind+1, ind+1)) 259 return script 260 261 262 def extract_result( 263 self, printer_name, 264 path_ppd=None, path_doc=None, path_workdir=None): 265 """ 266 This function extract from the log file CUPS logs related to printer 267 with given name. Extracted logs are also interpreted, the function 268 checks for any failures. If all optional parameters are set, a pipeline 269 used by CUPS is extracted from the logs. If some of required results 270 cannot be extracted, an exception is raised. This function is 271 thread-safe, it can be called simultaneously by many Python threads. 272 However, all simultaneous calls must have different values of the 273 printer_name parameter (calls with the same printer_name must be done 274 sequentially). 275 276 @param printer_name: a printer's name 277 @param path_ppd: a path to PPD file (string), required for pipelinie 278 extraction only 279 @param path_doc: a path to input document (string), required for 280 pipelinie extraction only 281 @param path_workdir: a path to a directory, that should be used as home 282 directory and temporary location by filters, required for 283 pipelinie extraction only 284 285 @returns a tuple of three variables: 286 1. True if no errors were detected, False otherwise 287 2. logs extracted from the log file (string) 288 3. pipeline as a bash script (string), None if not requested 289 290 @raises Exception in case of any errors 291 292 """ 293 # time when we started efforts to get logs 294 time_start = time.time() 295 # all three parameters must be set to extract a pipeline 296 pipeline_required = (path_ppd is not None and path_doc is not None 297 and path_workdir is not None) 298 try: 299 # try to get and parse logs 300 while True: 301 with self._critical_section: 302 # leave the loop if we have everything 303 if printer_name in self._printer_name_to_key: 304 if not pipeline_required: 305 break 306 key = self._printer_name_to_key[printer_name][0] 307 envp = self._results[key][1] 308 argv = self._results[key][2] 309 if len(envp) > 0 and len(argv) > 0: 310 break 311 # save current time and try to parse the log file 312 time_last_read = time.time() 313 self._parse_logs() 314 # leave the loop if we have everything 315 if printer_name in self._printer_name_to_key: 316 if not pipeline_required: 317 break 318 key = self._printer_name_to_key[printer_name][0] 319 envp = self._results[key][1] 320 argv = self._results[key][2] 321 if len(envp) > 0 and len(argv) > 0: 322 break 323 # raise an exception if we have been waiting too long 324 if time_last_read - time_start > _TIME_MAX_WAIT: 325 raise Exception('Cannot parse logs for %s or they did not' 326 ' appear in %s within %d seconds from printing the' 327 ' document' % (printer_name, _PATH_LOG_FILE, 328 _TIME_MAX_WAIT)) 329 # wait 1 second and try again to parse acquired logs 330 time.sleep(1) 331 # retrieve required data 332 with self._critical_section: 333 if pipeline_required: 334 # key, envp, argv are already set in the loop above 335 filters = self._results[key][3] 336 else: 337 key = self._printer_name_to_key[printer_name][0] 338 no_errors = self._results[key][0] 339 logs = '\n'.join(self._logs[key]) 340 # build a pipeline script if required 341 if pipeline_required: 342 pipeline = self._build_pipeline(printer_name, envp, argv, 343 filters, path_ppd, path_doc, path_workdir) 344 else: 345 pipeline = None 346 # return results 347 return no_errors, logs, pipeline 348 finally: 349 # cleanup 350 if printer_name in self._printer_name_to_key: 351 with self._critical_section: 352 key = self._printer_name_to_key[printer_name].pop(0) 353 if len(self._printer_name_to_key[printer_name]) == 0: 354 self._printer_name_to_key.pop(printer_name) 355 self._logs.pop(key) 356 self._results.pop(key) 357