1# Copyright 2018 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import re 6import subprocess 7import threading 8import time 9 10# A path of the log file 11_PATH_LOG_FILE = '/var/log/messages' 12# Time margin as a number of seconds. It is used during parsing of the log file. 13# Sometimes, logs are not in strict datetime order. This value defines maximum 14# possible datetime error in logs sequence. 15_TIME_MARGIN = 2 16# Max time in seconds to wait for CUPS logs to appear in the log file. 17_TIME_MAX_WAIT = 100 18# A list of filters that are installed by loading components. 19# Currently, they are Star and Epson drivers. 20_COMPONENT_FILTERS = ['rastertostar', 'rastertostarlm', 'epson-escpr-wrapper'] 21 22 23class LogReader(): 24 """ 25 An instance of this class is responsible for gathering data about CUPS jobs 26 and extracting their results and pipelines by parsing the main log file. It 27 can check for internal errors during a job execution and build a bash script 28 simulating an executed pipeline. CUPS server must have logging level set to 29 'debug' to make a pipeline extraction possible. 30 31 """ 32 33 def _read_last_lines(self, lines_count): 34 """ 35 The function reads given number of lines from the end of the log file. 36 The last line is omitted if it is not ended with EOL character. If a 37 whole file was read, count of returned lines may be smaller than 38 requested number. Always at least one line is returned; if the file has 39 no EOL characters an exception is raised. 40 41 @param lines_count: a number of lines to read 42 43 @returns a list of lines 44 45 """ 46 assert lines_count > 0 47 argv = ['tail', '-n', '%d' % (lines_count+1), _PATH_LOG_FILE] 48 p1 = subprocess.Popen(argv, stdout=subprocess.PIPE) 49 out,err = p1.communicate() 50 lines = out.split('\n') 51 lines.pop() 52 if len(lines) > lines_count: 53 if len(lines) == 0: 54 raise Exception('The file %s is empty' % _PATH_LOG_FILE) 55 lines = lines[1:] 56 return lines 57 58 59 def _get_datetime_from_line(self, line): 60 """ 61 Parse datetime from the given line read from the log file. 62 63 @param line_count: single line from the log files 64 65 @returns a datetime as a number of seconds from the epoch (float) 66 67 """ 68 return time.mktime(time.strptime(line[:20], "%Y-%m-%dT%H:%M:%S.")) 69 70 71 def __init__(self): 72 """ 73 Constructor. 74 75 """ 76 # the last line read from the log file 77 self._last_line_read = self._read_last_lines(1)[0] 78 # logs obtained from the log file after preprocessing 79 # key = a tuple (CUPS pid, job id) 80 # value = list with logs 81 self._logs = dict() 82 # data extracted from logs 83 # key = a tuple (CUPS pid, job id) 84 # value = a tuple (result(bool),envp(dict),argv(list),filters(list)) 85 self._results = dict() 86 # mapping from printer's name to a list of CUPS jobs 87 # key = printer name 88 # value = a list of tuples (CUPS pid, job id) 89 self._printer_name_to_key = dict() 90 # a lock for synchronization 91 self._critical_section = threading.Lock() 92 93 94 def _parse_logs(self): 95 """ 96 This method read and parses new lines from the log file (all complete 97 lines after the line saved in self._last_line_read). The lines are 98 filtered and saved in self._logs. Dictionaries self._results and 99 self._printer_name_to_key are updated accordingly. 100 101 """ 102 # ============================================== 103 # try to read lines from the end of the log file until the line from 104 # self._last_line_read is found 105 dt_last_line_read = self._get_datetime_from_line(self._last_line_read) 106 lines_count = 1024 107 pos_last_line_read = None 108 while True: 109 lines = self._read_last_lines(lines_count) 110 # search for self._last_line_read in lines read from the file 111 for index,line in enumerate(lines): 112 dt_line = self._get_datetime_from_line(line) 113 if dt_line > dt_last_line_read + _TIME_MARGIN: 114 # a datetime of the line is far behind, exit the loop 115 break 116 if line == self._last_line_read: 117 # the line was found, save its index and exit the loop 118 pos_last_line_read = index 119 break 120 # if the self._last_line_read was found, exit the 'while' loop 121 if pos_last_line_read is not None: 122 break 123 # check if we can try to read more lines from the file to repeat 124 # the search, if not then throw an exception 125 if ( len(lines) < lines_count or 126 self._get_datetime_from_line(lines[0]) + 127 _TIME_MARGIN < dt_last_line_read ): 128 raise Exception('Cannot find the last read line in the log' 129 ' file (maybe it has been erased?)') 130 # increase the number of lines to read 131 lines_count = lines_count * 2 132 # check if there are any new lines, exit if not 133 if pos_last_line_read + 1 == len(lines): 134 return 135 # update the field with the last line read from the log file 136 self._last_line_read = lines[-1] 137 # ============================================== 138 # parse logs, select these ones from CUPS jobs and save them to 139 # self._logs 140 updated_jobs = set() 141 for line in lines[pos_last_line_read+1:]: 142 tokens = line.split(' ', 5) 143 if ( len(tokens) < 6 or tokens[2][:6] != 'cupsd[' or 144 tokens[3] != '[Job' ): 145 continue 146 pid = int(tokens[2][6:-2]) 147 job_id = int(tokens[4][:-1]) 148 message = tokens[5] 149 key = (pid,job_id) 150 if key not in self._logs: 151 self._logs[key] = [] 152 self._logs[key].append(message) 153 updated_jobs.add(key) 154 # ============================================== 155 # try to interpret logs 156 for key in updated_jobs: 157 number_of_processes_to_check = None 158 printer_name = None 159 result = True 160 envp = dict() 161 argv = [] 162 filters = [] 163 for msg in self._logs[key]: 164 if number_of_processes_to_check is None: 165 regexp = re.match(r'(\d) filters for job:', msg) 166 if regexp is not None: 167 number_of_processes_to_check = int(regexp.group(1)) + 1 168 assert number_of_processes_to_check > 0 169 elif printer_name is None: 170 regexp = re.match(r'(\S+) \(\S+ to (\S+), cost \d+\)', msg) 171 if regexp is not None: 172 fltr = regexp.group(1) 173 if fltr == '-': 174 number_of_processes_to_check -= 1 175 else: 176 filters.append(fltr) 177 trg = regexp.group(2) 178 if trg.startswith('printer/') and '/' not in trg[8:]: 179 printer_name = trg[8:] 180 else: 181 regexp = re.match(r'envp\[\d+\]="(\S+)=(\S+)"', msg) 182 if regexp is not None: 183 envp[regexp.group(1)] = regexp.group(2) 184 continue 185 regexp = re.match(r'argv\[(\d+)\]="(.+)"', msg) 186 if regexp is not None: 187 if len(argv) != int(regexp.group(1)): 188 raise Exception('Error when parsing argv for %s' 189 % printer_name) 190 argv.append(regexp.group(2)) 191 continue 192 if number_of_processes_to_check > 0: 193 regexp = re.match(r'PID \d+ \(\S+\) ', msg) 194 if regexp is not None: 195 number_of_processes_to_check -= 1 196 if ( not msg.endswith(' exited with no errors.') and 197 'was terminated normally with signal' 198 not in msg ): 199 result = False 200 if number_of_processes_to_check == 0: 201 # if it is a new job, update self._printer_name_to_key 202 if key not in self._results: 203 if printer_name not in self._printer_name_to_key: 204 self._printer_name_to_key[printer_name] = [key] 205 else: 206 self._printer_name_to_key[printer_name].append(key) 207 # update results 208 self._results[key] = (result,envp,argv,filters) 209 210 211 def _build_pipeline( 212 self, printer_name, envp, argv, filters, 213 path_ppd, path_doc, path_workdir): 214 """ 215 This function tries to build a bash script containing a pipeline used 216 internally by CUPS. All printer's parameters are passed by the field 217 self._printers_data. Resultant script is saved in self._pipelines. 218 219 @param printer_name: a printer's name 220 @param envp: a dictionary with environment variables for the pipeline 221 @param argv: a list with command line parameters for filters 222 @param filters: a list of filters 223 @param path_ppd: a path to PPD file (string) 224 @param path_doc: a path to input document (string) 225 @param path_workdir: a path to a directory, that should be used as home 226 directory and temporary location by filters 227 228 @returns a pipeline as a bash script (string) 229 230 """ 231 # update paths set in environment variables 232 envp['PPD'] = path_ppd 233 envp['HOME'] = path_workdir 234 envp['TMPDIR'] = path_workdir 235 # drop the last argument, it is an input file 236 argv.pop() 237 # replace filter names by full path for Star and Epson drivers 238 if filters[-1] in _COMPONENT_FILTERS: 239 find_cmd = ['find', '/run/imageloader/', '-type', 'f', '-name'] 240 find_cmd.append(filters[-1]) 241 filters[-1] = subprocess.check_output(find_cmd).rstrip() 242 # build and return the script 243 script = '#!/bin/bash\nset -e\nset -o pipefail\n' 244 for name, value in envp.iteritems(): 245 script += ('export %s=%s\n' % (name, value)) 246 for ind, filt in enumerate(filters): 247 if ind > 0: 248 script += ('gzip -dc %d.doc.gz | ' % ind) 249 script += ('(exec -a "%s" %s' % (argv[0], filt)) 250 for arg in argv[1:]: 251 script += (' "%s"' % arg) 252 if ind == 0: 253 script += (' "%s"' % path_doc) 254 script += (') 2>%d.err | gzip -9 >%d.doc.gz\n' % (ind+1, ind+1)) 255 return script 256 257 258 def extract_result( 259 self, printer_name, 260 path_ppd=None, path_doc=None, path_workdir=None): 261 """ 262 This function extract from the log file CUPS logs related to printer 263 with given name. Extracted logs are also interpreted, the function 264 checks for any failures. If all optional parameters are set, a pipeline 265 used by CUPS is extracted from the logs. If some of required results 266 cannot be extracted, an exception is raised. This function is 267 thread-safe, it can be called simultaneously by many Python threads. 268 However, all simultaneous calls must have different values of the 269 printer_name parameter (calls with the same printer_name must be done 270 sequentially). 271 272 @param printer_name: a printer's name 273 @param path_ppd: a path to PPD file (string), required for pipelinie 274 extraction only 275 @param path_doc: a path to input document (string), required for 276 pipelinie extraction only 277 @param path_workdir: a path to a directory, that should be used as home 278 directory and temporary location by filters, required for 279 pipelinie extraction only 280 281 @returns a tuple of three variables: 282 1. True if no errors were detected, False otherwise 283 2. logs extracted from the log file (string) 284 3. pipeline as a bash script (string), None if not requested 285 286 @raises Exception in case of any errors 287 288 """ 289 # time when we started efforts to get logs 290 time_start = time.time() 291 # all three parameters must be set to extract a pipeline 292 pipeline_required = (path_ppd is not None and path_doc is not None 293 and path_workdir is not None) 294 try: 295 # try to get and parse logs 296 while True: 297 with self._critical_section: 298 # leave the loop if we have everything 299 if printer_name in self._printer_name_to_key: 300 if not pipeline_required: 301 break 302 key = self._printer_name_to_key[printer_name][0] 303 envp = self._results[key][1] 304 argv = self._results[key][2] 305 if len(envp) > 0 and len(argv) > 0: 306 break 307 # save current time and try to parse the log file 308 time_last_read = time.time() 309 self._parse_logs() 310 # leave the loop if we have everything 311 if printer_name in self._printer_name_to_key: 312 if not pipeline_required: 313 break 314 key = self._printer_name_to_key[printer_name][0] 315 envp = self._results[key][1] 316 argv = self._results[key][2] 317 if len(envp) > 0 and len(argv) > 0: 318 break 319 # raise an exception if we have been waiting too long 320 if time_last_read - time_start > _TIME_MAX_WAIT: 321 raise Exception('Cannot parse logs for %s or they did not' 322 ' appear in %s within %d seconds from printing the' 323 ' document' % (printer_name, _PATH_LOG_FILE, 324 _TIME_MAX_WAIT)) 325 # wait 1 second and try again to parse acquired logs 326 time.sleep(1) 327 # retrieve required data 328 with self._critical_section: 329 if pipeline_required: 330 # key, envp, argv are already set in the loop above 331 filters = self._results[key][3] 332 else: 333 key = self._printer_name_to_key[printer_name][0] 334 no_errors = self._results[key][0] 335 logs = '\n'.join(self._logs[key]) 336 # build a pipeline script if required 337 if pipeline_required: 338 pipeline = self._build_pipeline(printer_name, envp, argv, 339 filters, path_ppd, path_doc, path_workdir) 340 else: 341 pipeline = None 342 # return results 343 return no_errors, logs, pipeline 344 finally: 345 # cleanup 346 if printer_name in self._printer_name_to_key: 347 with self._critical_section: 348 key = self._printer_name_to_key[printer_name].pop(0) 349 if len(self._printer_name_to_key[printer_name]) == 0: 350 self._printer_name_to_key.pop(printer_name) 351 self._logs.pop(key) 352 self._results.pop(key) 353