• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import re
6import subprocess
7import threading
8import time
9
10# A path of the log file
11_PATH_LOG_FILE = '/var/log/messages'
12# Time margin as a number of seconds. It is used during parsing of the log file.
13# Sometimes, logs are not in strict datetime order. This value defines maximum
14# possible datetime error in logs sequence.
15_TIME_MARGIN = 2
16# Max time in seconds to wait for CUPS logs to appear in the log file.
17_TIME_MAX_WAIT = 100
18# A list of filters that are installed by loading components.
19# Currently, they are Star and Epson drivers.
20_COMPONENT_FILTERS = ['rastertostar', 'rastertostarlm', 'epson-escpr-wrapper']
21
22
23class LogReader():
24    """
25    An instance of this class is responsible for gathering data about CUPS jobs
26    and extracting their results and pipelines by parsing the main log file. It
27    can check for internal errors during a job execution and build a bash script
28    simulating an executed pipeline. CUPS server must have logging level set to
29    'debug' to make a pipeline extraction possible.
30
31    """
32
33    def _read_last_lines(self, lines_count):
34        """
35        The function reads given number of lines from the end of the log file.
36        The last line is omitted if it is not ended with EOL character. If a
37        whole file was read, count of returned lines may be smaller than
38        requested number. Always at least one line is returned; if the file has
39        no EOL characters an exception is raised.
40
41        @param lines_count: a number of lines to read
42
43        @returns a list of lines
44
45        """
46        assert lines_count > 0
47        argv = ['tail', '-n', '%d' % (lines_count+1), _PATH_LOG_FILE]
48        p1 = subprocess.Popen(argv, stdout=subprocess.PIPE)
49        out,err = p1.communicate()
50        lines = out.split('\n')
51        lines.pop()
52        if len(lines) > lines_count:
53            if len(lines) == 0:
54                raise Exception('The file %s is empty' % _PATH_LOG_FILE)
55            lines = lines[1:]
56        return lines
57
58
59    def _get_datetime_from_line(self, line):
60        """
61        Parse datetime from the given line read from the log file.
62
63        @param line_count: single line from the log files
64
65        @returns a datetime as a number of seconds from the epoch (float)
66
67        """
68        return time.mktime(time.strptime(line[:20], "%Y-%m-%dT%H:%M:%S."))
69
70
71    def __init__(self):
72        """
73        Constructor.
74
75        """
76        # the last line read from the log file
77        self._last_line_read = self._read_last_lines(1)[0]
78        # logs obtained from the log file after preprocessing
79        # key = a tuple (CUPS pid, job id)
80        # value = list with logs
81        self._logs = dict()
82        # data extracted from logs
83        # key = a tuple (CUPS pid, job id)
84        # value = a tuple (result(bool),envp(dict),argv(list),filters(list))
85        self._results = dict()
86        # mapping from printer's name to a list of CUPS jobs
87        # key = printer name
88        # value = a list of tuples (CUPS pid, job id)
89        self._printer_name_to_key = dict()
90        # a lock for synchronization
91        self._critical_section = threading.Lock()
92
93
94    def _parse_logs(self):
95        """
96        This method read and parses new lines from the log file (all complete
97        lines after the line saved in self._last_line_read). The lines are
98        filtered and saved in self._logs. Dictionaries self._results and
99        self._printer_name_to_key are updated accordingly.
100
101        """
102        # ==============================================
103        # try to read lines from the end of the log file until the line from
104        # self._last_line_read is found
105        dt_last_line_read = self._get_datetime_from_line(self._last_line_read)
106        lines_count = 1024
107        pos_last_line_read = None
108        while True:
109            lines = self._read_last_lines(lines_count)
110            # search for self._last_line_read in lines read from the file
111            for index,line in enumerate(lines):
112                dt_line = self._get_datetime_from_line(line)
113                if dt_line > dt_last_line_read + _TIME_MARGIN:
114                    # a datetime of the line is far behind, exit the loop
115                    break
116                if line == self._last_line_read:
117                    # the line was found, save its index and exit the loop
118                    pos_last_line_read = index
119                    break
120            # if the self._last_line_read was found, exit the 'while' loop
121            if pos_last_line_read is not None:
122                break
123            # check if we can try to read more lines from the file to repeat
124            # the search, if not then throw an exception
125            if ( len(lines) < lines_count or
126                    self._get_datetime_from_line(lines[0]) +
127                    _TIME_MARGIN < dt_last_line_read ):
128                raise Exception('Cannot find the last read line in the log'
129                        ' file (maybe it has been erased?)')
130            # increase the number of lines to read
131            lines_count = lines_count * 2
132        # check if there are any new lines, exit if not
133        if pos_last_line_read + 1 == len(lines):
134            return
135        # update the field with the last line read from the log file
136        self._last_line_read = lines[-1]
137        # ==============================================
138        # parse logs, select these ones from CUPS jobs and save them to
139        # self._logs
140        updated_jobs = set()
141        for line in lines[pos_last_line_read+1:]:
142            tokens = line.split(' ', 5)
143            if ( len(tokens) < 6 or tokens[2][:6] != 'cupsd[' or
144                    tokens[3] != '[Job' ):
145                continue
146            pid = int(tokens[2][6:-2])
147            job_id = int(tokens[4][:-1])
148            message = tokens[5]
149            key = (pid,job_id)
150            if key not in self._logs:
151                self._logs[key] = []
152            self._logs[key].append(message)
153            updated_jobs.add(key)
154        # ==============================================
155        # try to interpret logs
156        for key in updated_jobs:
157            number_of_processes_to_check = None
158            printer_name = None
159            result = True
160            envp = dict()
161            argv = []
162            filters = []
163            for msg in self._logs[key]:
164                if number_of_processes_to_check is None:
165                    regexp = re.match(r'(\d) filters for job:', msg)
166                    if regexp is not None:
167                        number_of_processes_to_check = int(regexp.group(1)) + 1
168                        assert number_of_processes_to_check > 0
169                elif printer_name is None:
170                    regexp = re.match(r'(\S+) \(\S+ to (\S+), cost \d+\)', msg)
171                    if regexp is not None:
172                        fltr = regexp.group(1)
173                        if fltr == '-':
174                            number_of_processes_to_check -= 1
175                        else:
176                            filters.append(fltr)
177                        trg = regexp.group(2)
178                        if trg.startswith('printer/') and '/' not in trg[8:]:
179                            printer_name = trg[8:]
180                else:
181                    regexp = re.match(r'envp\[\d+\]="(\S+)=(\S+)"', msg)
182                    if regexp is not None:
183                        envp[regexp.group(1)] = regexp.group(2)
184                        continue
185                    regexp = re.match(r'argv\[(\d+)\]="(.+)"', msg)
186                    if regexp is not None:
187                        if len(argv) != int(regexp.group(1)):
188                            raise Exception('Error when parsing argv for %s'
189                                    % printer_name)
190                        argv.append(regexp.group(2))
191                        continue
192                    if number_of_processes_to_check > 0:
193                        regexp = re.match(r'PID \d+ \(\S+\) ', msg)
194                        if regexp is not None:
195                            number_of_processes_to_check -= 1
196                            if ( not msg.endswith(' exited with no errors.') and
197                                    'was terminated normally with signal'
198                                    not in msg ):
199                                result = False
200            if number_of_processes_to_check == 0:
201                # if it is a new job, update self._printer_name_to_key
202                if key not in self._results:
203                    if printer_name not in self._printer_name_to_key:
204                        self._printer_name_to_key[printer_name] = [key]
205                    else:
206                        self._printer_name_to_key[printer_name].append(key)
207                # update results
208                self._results[key] = (result,envp,argv,filters)
209
210
211    def _build_pipeline(
212            self, printer_name, envp, argv, filters,
213            path_ppd, path_doc, path_workdir):
214        """
215        This function tries to build a bash script containing a pipeline used
216        internally by CUPS. All printer's parameters are passed by the field
217        self._printers_data. Resultant script is saved in self._pipelines.
218
219        @param printer_name: a printer's name
220        @param envp: a dictionary with environment variables for the pipeline
221        @param argv: a list with command line parameters for filters
222        @param filters: a list of filters
223        @param path_ppd: a path to PPD file (string)
224        @param path_doc: a path to input document (string)
225        @param path_workdir: a path to a directory, that should be used as home
226                directory and temporary location by filters
227
228        @returns a pipeline as a bash script (string)
229
230        """
231        # update paths set in environment variables
232        envp['PPD'] = path_ppd
233        envp['HOME'] = path_workdir
234        envp['TMPDIR'] = path_workdir
235        # drop the last argument, it is an input file
236        argv.pop()
237        # replace filter names by full path for Star and Epson drivers
238        if filters[-1] in _COMPONENT_FILTERS:
239            find_cmd = ['find', '/run/imageloader/', '-type', 'f', '-name']
240            find_cmd.append(filters[-1])
241            filters[-1] = subprocess.check_output(find_cmd).rstrip()
242        # build and return the script
243        script = '#!/bin/bash\nset -e\nset -o pipefail\n'
244        for name, value in envp.iteritems():
245            script += ('export %s=%s\n' % (name, value))
246        for ind, filt in enumerate(filters):
247            if ind > 0:
248                script += ('gzip -dc %d.doc.gz | ' % ind)
249            script += ('(exec -a "%s" %s' % (argv[0], filt))
250            for arg in argv[1:]:
251                script += (' "%s"' % arg)
252            if ind == 0:
253                script += (' "%s"' % path_doc)
254            script += (') 2>%d.err | gzip -9 >%d.doc.gz\n' % (ind+1, ind+1))
255        return script
256
257
258    def extract_result(
259            self, printer_name,
260            path_ppd=None, path_doc=None, path_workdir=None):
261        """
262        This function extract from the log file CUPS logs related to printer
263        with given name. Extracted logs are also interpreted, the function
264        checks for any failures. If all optional parameters are set, a pipeline
265        used by CUPS is extracted from the logs. If some of required results
266        cannot be extracted, an exception is raised. This function is
267        thread-safe, it can be called simultaneously by many Python threads.
268        However, all simultaneous calls must have different values of the
269        printer_name parameter (calls with the same printer_name must be done
270        sequentially).
271
272        @param printer_name: a printer's name
273        @param path_ppd: a path to PPD file (string), required for pipelinie
274                extraction only
275        @param path_doc: a path to input document (string), required for
276                pipelinie extraction only
277        @param path_workdir: a path to a directory, that should be used as home
278                directory and temporary location by filters, required for
279                pipelinie extraction only
280
281        @returns a tuple of three variables:
282                1. True if no errors were detected, False otherwise
283                2. logs extracted from the log file (string)
284                3. pipeline as a bash script (string), None if not requested
285
286        @raises Exception in case of any errors
287
288        """
289        # time when we started efforts to get logs
290        time_start = time.time()
291        # all three parameters must be set to extract a pipeline
292        pipeline_required = (path_ppd is not None and path_doc is not None
293                and path_workdir is not None)
294        try:
295            # try to get and parse logs
296            while True:
297                with self._critical_section:
298                    # leave the loop if we have everything
299                    if printer_name in self._printer_name_to_key:
300                        if not pipeline_required:
301                            break
302                        key = self._printer_name_to_key[printer_name][0]
303                        envp = self._results[key][1]
304                        argv = self._results[key][2]
305                        if len(envp) > 0 and len(argv) > 0:
306                            break
307                    # save current time and try to parse the log file
308                    time_last_read = time.time()
309                    self._parse_logs()
310                    # leave the loop if we have everything
311                    if printer_name in self._printer_name_to_key:
312                        if not pipeline_required:
313                            break
314                        key = self._printer_name_to_key[printer_name][0]
315                        envp = self._results[key][1]
316                        argv = self._results[key][2]
317                        if len(envp) > 0 and len(argv) > 0:
318                            break
319                # raise an exception if we have been waiting too long
320                if time_last_read - time_start > _TIME_MAX_WAIT:
321                    raise Exception('Cannot parse logs for %s or they did not'
322                            ' appear in %s within %d seconds from printing the'
323                            ' document' % (printer_name, _PATH_LOG_FILE,
324                            _TIME_MAX_WAIT))
325                # wait 1 second and try again to parse acquired logs
326                time.sleep(1)
327            # retrieve required data
328            with self._critical_section:
329                if pipeline_required:
330                    # key, envp, argv are already set in the loop above
331                    filters = self._results[key][3]
332                else:
333                    key = self._printer_name_to_key[printer_name][0]
334                no_errors = self._results[key][0]
335                logs = '\n'.join(self._logs[key])
336            # build a pipeline script if required
337            if pipeline_required:
338                pipeline = self._build_pipeline(printer_name, envp, argv,
339                        filters, path_ppd, path_doc, path_workdir)
340            else:
341                pipeline = None
342            # return results
343            return no_errors, logs, pipeline
344        finally:
345            # cleanup
346            if printer_name in self._printer_name_to_key:
347                with self._critical_section:
348                    key = self._printer_name_to_key[printer_name].pop(0)
349                    if len(self._printer_name_to_key[printer_name]) == 0:
350                        self._printer_name_to_key.pop(printer_name)
351                    self._logs.pop(key)
352                    self._results.pop(key)
353