# Copyright 2018 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import re import subprocess import threading import time # A path of the log file _PATH_LOG_FILE = '/var/log/messages' # Time margin as a number of seconds. It is used during parsing of the log file. # Sometimes, logs are not in strict datetime order. This value defines maximum # possible datetime error in logs sequence. _TIME_MARGIN = 2 # Max time in seconds to wait for CUPS logs to appear in the log file. _TIME_MAX_WAIT = 100 # A list of filters that are installed by loading components. # Currently, they are Star and Epson drivers. _COMPONENT_FILTERS = ['rastertostar', 'rastertostarlm', 'epson-escpr-wrapper'] class LogReader(): """ An instance of this class is responsible for gathering data about CUPS jobs and extracting their results and pipelines by parsing the main log file. It can check for internal errors during a job execution and build a bash script simulating an executed pipeline. CUPS server must have logging level set to 'debug' to make a pipeline extraction possible. """ def _read_last_lines(self, lines_count): """ The function reads given number of lines from the end of the log file. The last line is omitted if it is not ended with EOL character. If a whole file was read, count of returned lines may be smaller than requested number. Always at least one line is returned; if the file has no EOL characters an exception is raised. @param lines_count: a number of lines to read @returns a list of lines """ assert lines_count > 0 argv = ['tail', '-n', '%d' % (lines_count+1), _PATH_LOG_FILE] p1 = subprocess.Popen(argv, stdout=subprocess.PIPE) out,err = p1.communicate() lines = out.split('\n') lines.pop() if len(lines) > lines_count: if len(lines) == 0: raise Exception('The file %s is empty' % _PATH_LOG_FILE) lines = lines[1:] return lines def _get_datetime_from_line(self, line): """ Parse datetime from the given line read from the log file. @param line_count: single line from the log files @returns a datetime as a number of seconds from the epoch (float) """ return time.mktime(time.strptime(line[:20], "%Y-%m-%dT%H:%M:%S.")) def __init__(self): """ Constructor. """ # the last line read from the log file self._last_line_read = self._read_last_lines(1)[0] # logs obtained from the log file after preprocessing # key = a tuple (CUPS pid, job id) # value = list with logs self._logs = dict() # data extracted from logs # key = a tuple (CUPS pid, job id) # value = a tuple (result(bool),envp(dict),argv(list),filters(list)) self._results = dict() # mapping from printer's name to a list of CUPS jobs # key = printer name # value = a list of tuples (CUPS pid, job id) self._printer_name_to_key = dict() # a lock for synchronization self._critical_section = threading.Lock() def _parse_logs(self): """ This method read and parses new lines from the log file (all complete lines after the line saved in self._last_line_read). The lines are filtered and saved in self._logs. Dictionaries self._results and self._printer_name_to_key are updated accordingly. """ # ============================================== # try to read lines from the end of the log file until the line from # self._last_line_read is found dt_last_line_read = self._get_datetime_from_line(self._last_line_read) lines_count = 1024 pos_last_line_read = None while True: lines = self._read_last_lines(lines_count) # search for self._last_line_read in lines read from the file for index,line in enumerate(lines): dt_line = self._get_datetime_from_line(line) if dt_line > dt_last_line_read + _TIME_MARGIN: # a datetime of the line is far behind, exit the loop break if line == self._last_line_read: # the line was found, save its index and exit the loop pos_last_line_read = index break # if the self._last_line_read was found, exit the 'while' loop if pos_last_line_read is not None: break # check if we can try to read more lines from the file to repeat # the search, if not then throw an exception if ( len(lines) < lines_count or self._get_datetime_from_line(lines[0]) + _TIME_MARGIN < dt_last_line_read ): raise Exception('Cannot find the last read line in the log' ' file (maybe it has been erased?)') # increase the number of lines to read lines_count = lines_count * 2 # check if there are any new lines, exit if not if pos_last_line_read + 1 == len(lines): return # update the field with the last line read from the log file self._last_line_read = lines[-1] # ============================================== # parse logs, select these ones from CUPS jobs and save them to # self._logs updated_jobs = set() for line in lines[pos_last_line_read+1:]: tokens = line.split(' ', 5) if ( len(tokens) < 6 or tokens[2][:6] != 'cupsd[' or tokens[3] != '[Job' ): continue pid = int(tokens[2][6:-2]) job_id = int(tokens[4][:-1]) message = tokens[5] key = (pid,job_id) if key not in self._logs: self._logs[key] = [] self._logs[key].append(message) updated_jobs.add(key) # ============================================== # try to interpret logs for key in updated_jobs: number_of_processes_to_check = None printer_name = None result = True envp = dict() argv = [] filters = [] for msg in self._logs[key]: if number_of_processes_to_check is None: regexp = re.match(r'(\d) filters for job:', msg) if regexp is not None: number_of_processes_to_check = int(regexp.group(1)) + 1 assert number_of_processes_to_check > 0 elif printer_name is None: regexp = re.match(r'(\S+) \(\S+ to (\S+), cost \d+\)', msg) if regexp is not None: fltr = regexp.group(1) if fltr == '-': number_of_processes_to_check -= 1 else: filters.append(fltr) trg = regexp.group(2) if trg.startswith('printer/') and '/' not in trg[8:]: printer_name = trg[8:] else: regexp = re.match(r'envp\[\d+\]="(\S+)=(\S+)"', msg) if regexp is not None: envp[regexp.group(1)] = regexp.group(2) continue regexp = re.match(r'argv\[(\d+)\]="(.+)"', msg) if regexp is not None: if len(argv) != int(regexp.group(1)): raise Exception('Error when parsing argv for %s' % printer_name) argv.append(regexp.group(2)) continue if number_of_processes_to_check > 0: regexp = re.match(r'PID \d+ \(\S+\) ', msg) if regexp is not None: number_of_processes_to_check -= 1 if ( not msg.endswith(' exited with no errors.') and 'was terminated normally with signal' not in msg ): result = False if number_of_processes_to_check == 0: # if it is a new job, update self._printer_name_to_key if key not in self._results: if printer_name not in self._printer_name_to_key: self._printer_name_to_key[printer_name] = [key] else: self._printer_name_to_key[printer_name].append(key) # update results self._results[key] = (result,envp,argv,filters) def _build_pipeline( self, printer_name, envp, argv, filters, path_ppd, path_doc, path_workdir): """ This function tries to build a bash script containing a pipeline used internally by CUPS. All printer's parameters are passed by the field self._printers_data. Resultant script is saved in self._pipelines. @param printer_name: a printer's name @param envp: a dictionary with environment variables for the pipeline @param argv: a list with command line parameters for filters @param filters: a list of filters @param path_ppd: a path to PPD file (string) @param path_doc: a path to input document (string) @param path_workdir: a path to a directory, that should be used as home directory and temporary location by filters @returns a pipeline as a bash script (string) """ # update paths set in environment variables envp['PPD'] = path_ppd envp['HOME'] = path_workdir envp['TMPDIR'] = path_workdir # drop the last argument, it is an input file argv.pop() # replace filter names by full path for Star and Epson drivers if filters[-1] in _COMPONENT_FILTERS: find_cmd = ['find', '/run/imageloader/', '-type', 'f', '-name'] find_cmd.append(filters[-1]) filters[-1] = subprocess.check_output(find_cmd).rstrip() # build and return the script script = '#!/bin/bash\nset -e\nset -o pipefail\n' for name, value in envp.iteritems(): script += ('export %s=%s\n' % (name, value)) for ind, filt in enumerate(filters): if ind > 0: script += ('gzip -dc %d.doc.gz | ' % ind) script += ('(exec -a "%s" %s' % (argv[0], filt)) for arg in argv[1:]: script += (' "%s"' % arg) if ind == 0: script += (' "%s"' % path_doc) script += (') 2>%d.err | gzip -9 >%d.doc.gz\n' % (ind+1, ind+1)) return script def extract_result( self, printer_name, path_ppd=None, path_doc=None, path_workdir=None): """ This function extract from the log file CUPS logs related to printer with given name. Extracted logs are also interpreted, the function checks for any failures. If all optional parameters are set, a pipeline used by CUPS is extracted from the logs. If some of required results cannot be extracted, an exception is raised. This function is thread-safe, it can be called simultaneously by many Python threads. However, all simultaneous calls must have different values of the printer_name parameter (calls with the same printer_name must be done sequentially). @param printer_name: a printer's name @param path_ppd: a path to PPD file (string), required for pipelinie extraction only @param path_doc: a path to input document (string), required for pipelinie extraction only @param path_workdir: a path to a directory, that should be used as home directory and temporary location by filters, required for pipelinie extraction only @returns a tuple of three variables: 1. True if no errors were detected, False otherwise 2. logs extracted from the log file (string) 3. pipeline as a bash script (string), None if not requested @raises Exception in case of any errors """ # time when we started efforts to get logs time_start = time.time() # all three parameters must be set to extract a pipeline pipeline_required = (path_ppd is not None and path_doc is not None and path_workdir is not None) try: # try to get and parse logs while True: with self._critical_section: # leave the loop if we have everything if printer_name in self._printer_name_to_key: if not pipeline_required: break key = self._printer_name_to_key[printer_name][0] envp = self._results[key][1] argv = self._results[key][2] if len(envp) > 0 and len(argv) > 0: break # save current time and try to parse the log file time_last_read = time.time() self._parse_logs() # leave the loop if we have everything if printer_name in self._printer_name_to_key: if not pipeline_required: break key = self._printer_name_to_key[printer_name][0] envp = self._results[key][1] argv = self._results[key][2] if len(envp) > 0 and len(argv) > 0: break # raise an exception if we have been waiting too long if time_last_read - time_start > _TIME_MAX_WAIT: raise Exception('Cannot parse logs for %s or they did not' ' appear in %s within %d seconds from printing the' ' document' % (printer_name, _PATH_LOG_FILE, _TIME_MAX_WAIT)) # wait 1 second and try again to parse acquired logs time.sleep(1) # retrieve required data with self._critical_section: if pipeline_required: # key, envp, argv are already set in the loop above filters = self._results[key][3] else: key = self._printer_name_to_key[printer_name][0] no_errors = self._results[key][0] logs = '\n'.join(self._logs[key]) # build a pipeline script if required if pipeline_required: pipeline = self._build_pipeline(printer_name, envp, argv, filters, path_ppd, path_doc, path_workdir) else: pipeline = None # return results return no_errors, logs, pipeline finally: # cleanup if printer_name in self._printer_name_to_key: with self._critical_section: key = self._printer_name_to_key[printer_name].pop(0) if len(self._printer_name_to_key[printer_name]) == 0: self._printer_name_to_key.pop(printer_name) self._logs.pop(key) self._results.pop(key)