• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import re
6import subprocess
7import threading
8import time
9
10# A path of the log file
11_PATH_LOG_FILE = '/var/log/messages'
12# Time margin as a number of seconds. It is used during parsing of the log file.
13# Sometimes, logs are not in strict datetime order. This value defines maximum
14# possible datetime error in logs sequence.
15_TIME_MARGIN = 2
16# Max time in seconds to wait for CUPS logs to appear in the log file.
17_TIME_MAX_WAIT = 100
18# A list of filters that are installed by loading components.
19# Currently, they are Star and Epson drivers.
20_COMPONENT_FILTERS = ['rastertostar', 'rastertostarlm', 'epson-escpr-wrapper']
21
22
23class LogReader():
24    """
25    An instance of this class is responsible for gathering data about CUPS jobs
26    and extracting their results and pipelines by parsing the main log file. It
27    can check for internal errors during a job execution and build a bash script
28    simulating an executed pipeline. CUPS server must have logging level set to
29    'debug' to make a pipeline extraction possible.
30
31    """
32
33    def _read_last_lines(self, lines_count):
34        """
35        The function reads given number of lines from the end of the log file.
36        The last line is omitted if it is not ended with EOL character. If a
37        whole file was read, count of returned lines may be smaller than
38        requested number. Always at least one line is returned; if the file has
39        no EOL characters an exception is raised.
40
41        @param lines_count: a number of lines to read
42
43        @returns a list of lines (as strings)
44
45        """
46        assert lines_count > 0
47        argv = ['tail', '-n', '%d' % (lines_count+1), _PATH_LOG_FILE]
48        p1 = subprocess.Popen(argv, stdout=subprocess.PIPE)
49        out,err = p1.communicate()
50
51        # It is possible for invalid UTF-8 to appear in the system log
52        # (e.g. null bytes on unclean poweroff), but this doesn't
53        # concern us, so we elect to ignore it.
54        lines = out.decode(errors="ignore").split('\n')
55        lines.pop()
56        if len(lines) > lines_count:
57            if len(lines) == 0:
58                raise Exception('The file %s is empty' % _PATH_LOG_FILE)
59            lines = lines[1:]
60        return lines
61
62
63    def _get_datetime_from_line(self, line):
64        """
65        Parse datetime from the given line read from the log file.
66
67        @param line_count: single line from the log files
68
69        @returns a datetime as a number of seconds from the epoch (float)
70
71        """
72        return time.mktime(time.strptime(line[:20], "%Y-%m-%dT%H:%M:%S."))
73
74
75    def __init__(self):
76        """
77        Constructor.
78
79        """
80        # the last line read from the log file
81        self._last_line_read = self._read_last_lines(1)[0]
82        # logs obtained from the log file after preprocessing
83        # key = a tuple (CUPS pid, job id)
84        # value = list with logs
85        self._logs = dict()
86        # data extracted from logs
87        # key = a tuple (CUPS pid, job id)
88        # value = a tuple (result(bool),envp(dict),argv(list),filters(list))
89        self._results = dict()
90        # mapping from printer's name to a list of CUPS jobs
91        # key = printer name
92        # value = a list of tuples (CUPS pid, job id)
93        self._printer_name_to_key = dict()
94        # a lock for synchronization
95        self._critical_section = threading.Lock()
96
97
98    def _parse_logs(self):
99        """
100        This method read and parses new lines from the log file (all complete
101        lines after the line saved in self._last_line_read). The lines are
102        filtered and saved in self._logs. Dictionaries self._results and
103        self._printer_name_to_key are updated accordingly.
104
105        """
106        # ==============================================
107        # try to read lines from the end of the log file until the line from
108        # self._last_line_read is found
109        dt_last_line_read = self._get_datetime_from_line(self._last_line_read)
110        lines_count = 1024
111        pos_last_line_read = None
112        while True:
113            lines = self._read_last_lines(lines_count)
114            # search for self._last_line_read in lines read from the file
115            for index,line in enumerate(lines):
116                dt_line = self._get_datetime_from_line(line)
117                if dt_line > dt_last_line_read + _TIME_MARGIN:
118                    # a datetime of the line is far behind, exit the loop
119                    break
120                if line == self._last_line_read:
121                    # the line was found, save its index and exit the loop
122                    pos_last_line_read = index
123                    break
124            # if the self._last_line_read was found, exit the 'while' loop
125            if pos_last_line_read is not None:
126                break
127            # check if we can try to read more lines from the file to repeat
128            # the search, if not then throw an exception
129            if ( len(lines) < lines_count or
130                    self._get_datetime_from_line(lines[0]) +
131                    _TIME_MARGIN < dt_last_line_read ):
132                raise Exception('Cannot find the last read line in the log'
133                        ' file (maybe it has been erased?)')
134            # increase the number of lines to read
135            lines_count = lines_count * 2
136        # check if there are any new lines, exit if not
137        if pos_last_line_read + 1 == len(lines):
138            return
139        # update the field with the last line read from the log file
140        self._last_line_read = lines[-1]
141        # ==============================================
142        # parse logs, select these ones from CUPS jobs and save them to
143        # self._logs
144        updated_jobs = set()
145        for line in lines[pos_last_line_read+1:]:
146            tokens = line.split(' ', 5)
147            if ( len(tokens) < 6 or tokens[2][:6] != 'cupsd[' or
148                    tokens[3] != '[Job' ):
149                continue
150            pid = int(tokens[2][6:-2])
151            job_id = int(tokens[4][:-1])
152            message = tokens[5]
153            key = (pid,job_id)
154            if key not in self._logs:
155                self._logs[key] = []
156            self._logs[key].append(message)
157            updated_jobs.add(key)
158        # ==============================================
159        # try to interpret logs
160        for key in updated_jobs:
161            number_of_processes_to_check = None
162            printer_name = None
163            result = True
164            envp = dict()
165            argv = []
166            filters = []
167            for msg in self._logs[key]:
168                if number_of_processes_to_check is None:
169                    regexp = re.match(r'(\d) filters for job:', msg)
170                    if regexp is not None:
171                        number_of_processes_to_check = int(regexp.group(1)) + 1
172                        assert number_of_processes_to_check > 0
173                elif printer_name is None:
174                    regexp = re.match(r'(\S+) \(\S+ to (\S+), cost \d+\)', msg)
175                    if regexp is not None:
176                        fltr = regexp.group(1)
177                        if fltr == '-':
178                            number_of_processes_to_check -= 1
179                        else:
180                            filters.append(fltr)
181                        trg = regexp.group(2)
182                        if trg.startswith('printer/') and '/' not in trg[8:]:
183                            printer_name = trg[8:]
184                else:
185                    regexp = re.match(r'envp\[\d+\]="(\S+)=(\S+)"', msg)
186                    if regexp is not None:
187                        envp[regexp.group(1)] = regexp.group(2)
188                        continue
189                    regexp = re.match(r'argv\[(\d+)\]="(.+)"', msg)
190                    if regexp is not None:
191                        if len(argv) != int(regexp.group(1)):
192                            raise Exception('Error when parsing argv for %s'
193                                    % printer_name)
194                        argv.append(regexp.group(2))
195                        continue
196                    if number_of_processes_to_check > 0:
197                        regexp = re.match(r'PID \d+ \(\S+\) ', msg)
198                        if regexp is not None:
199                            number_of_processes_to_check -= 1
200                            if ( not msg.endswith(' exited with no errors.') and
201                                    'was terminated normally with signal'
202                                    not in msg ):
203                                result = False
204            if number_of_processes_to_check == 0:
205                # if it is a new job, update self._printer_name_to_key
206                if key not in self._results:
207                    if printer_name not in self._printer_name_to_key:
208                        self._printer_name_to_key[printer_name] = [key]
209                    else:
210                        self._printer_name_to_key[printer_name].append(key)
211                # update results
212                self._results[key] = (result,envp,argv,filters)
213
214
215    def _build_pipeline(
216            self, printer_name, envp, argv, filters,
217            path_ppd, path_doc, path_workdir):
218        """
219        This function tries to build a bash script containing a pipeline used
220        internally by CUPS. All printer's parameters are passed by the field
221        self._printers_data. Resultant script is saved in self._pipelines.
222
223        @param printer_name: a printer's name
224        @param envp: a dictionary with environment variables for the pipeline
225        @param argv: a list with command line parameters for filters
226        @param filters: a list of filters
227        @param path_ppd: a path to PPD file (string)
228        @param path_doc: a path to input document (string)
229        @param path_workdir: a path to a directory, that should be used as home
230                directory and temporary location by filters
231
232        @returns a pipeline as a bash script (string)
233
234        """
235        # update paths set in environment variables
236        envp['PPD'] = path_ppd
237        envp['HOME'] = path_workdir
238        envp['TMPDIR'] = path_workdir
239        # drop the last argument, it is an input file
240        argv.pop()
241        # replace filter names by full path for Star and Epson drivers
242        if filters[-1] in _COMPONENT_FILTERS:
243            find_cmd = ['find', '/run/imageloader/', '-type', 'f', '-name']
244            find_cmd.append(filters[-1])
245            filters[-1] = subprocess.check_output(find_cmd).rstrip()
246        # build and return the script
247        script = '#!/bin/bash\nset -e\nset -o pipefail\n'
248        for name, value in envp.items():
249            script += ('export %s=%s\n' % (name, value))
250        for ind, filt in enumerate(filters):
251            if ind > 0:
252                script += ('gzip -dc %d.doc.gz | ' % ind)
253            script += ('(exec -a "%s" %s' % (argv[0], filt))
254            for arg in argv[1:]:
255                script += (' "%s"' % arg)
256            if ind == 0:
257                script += (' "%s"' % path_doc)
258            script += (') 2>%d.err | gzip -9 >%d.doc.gz\n' % (ind+1, ind+1))
259        return script
260
261
262    def extract_result(
263            self, printer_name,
264            path_ppd=None, path_doc=None, path_workdir=None):
265        """
266        This function extract from the log file CUPS logs related to printer
267        with given name. Extracted logs are also interpreted, the function
268        checks for any failures. If all optional parameters are set, a pipeline
269        used by CUPS is extracted from the logs. If some of required results
270        cannot be extracted, an exception is raised. This function is
271        thread-safe, it can be called simultaneously by many Python threads.
272        However, all simultaneous calls must have different values of the
273        printer_name parameter (calls with the same printer_name must be done
274        sequentially).
275
276        @param printer_name: a printer's name
277        @param path_ppd: a path to PPD file (string), required for pipelinie
278                extraction only
279        @param path_doc: a path to input document (string), required for
280                pipelinie extraction only
281        @param path_workdir: a path to a directory, that should be used as home
282                directory and temporary location by filters, required for
283                pipelinie extraction only
284
285        @returns a tuple of three variables:
286                1. True if no errors were detected, False otherwise
287                2. logs extracted from the log file (string)
288                3. pipeline as a bash script (string), None if not requested
289
290        @raises Exception in case of any errors
291
292        """
293        # time when we started efforts to get logs
294        time_start = time.time()
295        # all three parameters must be set to extract a pipeline
296        pipeline_required = (path_ppd is not None and path_doc is not None
297                and path_workdir is not None)
298        try:
299            # try to get and parse logs
300            while True:
301                with self._critical_section:
302                    # leave the loop if we have everything
303                    if printer_name in self._printer_name_to_key:
304                        if not pipeline_required:
305                            break
306                        key = self._printer_name_to_key[printer_name][0]
307                        envp = self._results[key][1]
308                        argv = self._results[key][2]
309                        if len(envp) > 0 and len(argv) > 0:
310                            break
311                    # save current time and try to parse the log file
312                    time_last_read = time.time()
313                    self._parse_logs()
314                    # leave the loop if we have everything
315                    if printer_name in self._printer_name_to_key:
316                        if not pipeline_required:
317                            break
318                        key = self._printer_name_to_key[printer_name][0]
319                        envp = self._results[key][1]
320                        argv = self._results[key][2]
321                        if len(envp) > 0 and len(argv) > 0:
322                            break
323                # raise an exception if we have been waiting too long
324                if time_last_read - time_start > _TIME_MAX_WAIT:
325                    raise Exception('Cannot parse logs for %s or they did not'
326                            ' appear in %s within %d seconds from printing the'
327                            ' document' % (printer_name, _PATH_LOG_FILE,
328                            _TIME_MAX_WAIT))
329                # wait 1 second and try again to parse acquired logs
330                time.sleep(1)
331            # retrieve required data
332            with self._critical_section:
333                if pipeline_required:
334                    # key, envp, argv are already set in the loop above
335                    filters = self._results[key][3]
336                else:
337                    key = self._printer_name_to_key[printer_name][0]
338                no_errors = self._results[key][0]
339                logs = '\n'.join(self._logs[key])
340            # build a pipeline script if required
341            if pipeline_required:
342                pipeline = self._build_pipeline(printer_name, envp, argv,
343                        filters, path_ppd, path_doc, path_workdir)
344            else:
345                pipeline = None
346            # return results
347            return no_errors, logs, pipeline
348        finally:
349            # cleanup
350            if printer_name in self._printer_name_to_key:
351                with self._critical_section:
352                    key = self._printer_name_to_key[printer_name].pop(0)
353                    if len(self._printer_name_to_key[printer_name]) == 0:
354                        self._printer_name_to_key.pop(printer_name)
355                    self._logs.pop(key)
356                    self._results.pop(key)
357