#!/usr/bin/python3 # # Copyright (c) 2018-2019 Collabora, Ltd. # # SPDX-License-Identifier: Apache-2.0 # # Author(s): Ryan Pavlik "Utilities for processing files." from pathlib import Path class LinewiseFileProcessor: """A base class for code that processes an input file (or file handle) one line at a time.""" def __init__(self): self._lines = [] self._line_num = 0 self._next_line = None self._line = '' self._filename = Path() @property def filename(self): """The Path object of the currently processed file""" return self._filename @property def relative_filename(self): """The current file's Path relative to the current working directory""" return self.filename.relative_to(Path('.').resolve()) @property def line(self): """The current line, including any trailing whitespace and the line ending.""" return self._line @property def line_number(self): """Get 1-indexed line number.""" return self._line_num @property def line_rstripped(self): """The current line without any trailing whitespace.""" if self.line is None: return None return self.line.rstrip() @property def trailing_whitespace(self): """The trailing whitespace of the current line that gets removed when accessing rstrippedLine""" non_whitespace_length = len(self.line_rstripped) return self.line[non_whitespace_length:] @property def next_line(self): """Peek at the next line, if any.""" return self._next_line @property def next_line_rstripped(self): """Peek at the next line, if any, without any trailing whitespace.""" if self.next_line is None: return None return self.next_line.rstrip() def get_preceding_line(self, relative_index=-1): """Retrieve the line at an line number at the given relative index, if one exists. Returns None if there is no line there.""" if relative_index >= 0: raise RuntimeError( 'relativeIndex must be negative, to retrieve a preceding line.') if relative_index + self.line_number <= 0: # There is no line at this index return None return self._lines[self.line_number + relative_index - 1] def get_preceding_lines(self, num): """Get *up to* the preceding num lines. Fewer may be returned if the requested number aren't available.""" return self._lines[- (num + 1):-1] def process_line(self, line_num, line): """Implement in your subclass to handle each new line.""" raise NotImplementedError def _process_file_handle(self, file_handle): # These are so we can process one line earlier than we're actually iterating thru. processing_line_num = None processing_line = None def do_process_line(): self._line_num = processing_line_num self._line = processing_line if processing_line is not None: self._lines.append(processing_line) self.process_line(processing_line_num, processing_line) for line_num, line in enumerate(file_handle, 1): self._next_line = line do_process_line() processing_line_num = line_num processing_line = line # Finally process the left-over line self._next_line = None do_process_line() def process_file(self, filename, file_handle=None): """Main entry point - call with a filename and optionally the file handle to read from.""" if isinstance(filename, str): filename = Path(filename).resolve() self._filename = filename if file_handle: self._process_file_handle(file_handle) else: with self._filename.open('r', encoding='utf-8') as f: self._process_file_handle(f)