1#!/usr/bin/python3 2# 3# Copyright (c) 2018-2019 Collabora, Ltd. 4# 5# SPDX-License-Identifier: Apache-2.0 6# 7# Author(s): Ryan Pavlik <ryan.pavlik@collabora.com> 8"Utilities for processing files." 9 10from pathlib import Path 11 12 13class LinewiseFileProcessor: 14 """A base class for code that processes an input file (or file handle) one line at a time.""" 15 16 def __init__(self): 17 self._lines = [] 18 self._line_num = 0 19 self._next_line = None 20 self._line = '' 21 self._filename = Path() 22 23 @property 24 def filename(self): 25 """The Path object of the currently processed file""" 26 return self._filename 27 28 @property 29 def relative_filename(self): 30 """The current file's Path relative to the current working directory""" 31 return self.filename.relative_to(Path('.').resolve()) 32 33 @property 34 def line(self): 35 """The current line, including any trailing whitespace and the line ending.""" 36 return self._line 37 38 @property 39 def line_number(self): 40 """Get 1-indexed line number.""" 41 return self._line_num 42 43 @property 44 def line_rstripped(self): 45 """The current line without any trailing whitespace.""" 46 if self.line is None: 47 return None 48 return self.line.rstrip() 49 50 @property 51 def trailing_whitespace(self): 52 """The trailing whitespace of the current line that gets removed when accessing rstrippedLine""" 53 non_whitespace_length = len(self.line_rstripped) 54 return self.line[non_whitespace_length:] 55 56 @property 57 def next_line(self): 58 """Peek at the next line, if any.""" 59 return self._next_line 60 61 @property 62 def next_line_rstripped(self): 63 """Peek at the next line, if any, without any trailing whitespace.""" 64 if self.next_line is None: 65 return None 66 return self.next_line.rstrip() 67 68 def get_preceding_line(self, relative_index=-1): 69 """Retrieve the line at an line number at the given relative index, if one exists. Returns None if there is no line there.""" 70 if relative_index >= 0: 71 raise RuntimeError( 72 'relativeIndex must be negative, to retrieve a preceding line.') 73 if relative_index + self.line_number <= 0: 74 # There is no line at this index 75 return None 76 return self._lines[self.line_number + relative_index - 1] 77 78 def get_preceding_lines(self, num): 79 """Get *up to* the preceding num lines. Fewer may be returned if the requested number aren't available.""" 80 return self._lines[- (num + 1):-1] 81 82 def process_line(self, line_num, line): 83 """Implement in your subclass to handle each new line.""" 84 raise NotImplementedError 85 86 def _process_file_handle(self, file_handle): 87 # These are so we can process one line earlier than we're actually iterating thru. 88 processing_line_num = None 89 processing_line = None 90 91 def do_process_line(): 92 self._line_num = processing_line_num 93 self._line = processing_line 94 if processing_line is not None: 95 self._lines.append(processing_line) 96 self.process_line(processing_line_num, processing_line) 97 98 for line_num, line in enumerate(file_handle, 1): 99 self._next_line = line 100 do_process_line() 101 processing_line_num = line_num 102 processing_line = line 103 104 # Finally process the left-over line 105 self._next_line = None 106 do_process_line() 107 108 def process_file(self, filename, file_handle=None): 109 """Main entry point - call with a filename and optionally the file handle to read from.""" 110 if isinstance(filename, str): 111 filename = Path(filename).resolve() 112 113 self._filename = filename 114 115 if file_handle: 116 self._process_file_handle(file_handle) 117 else: 118 with self._filename.open('r', encoding='utf-8') as f: 119 self._process_file_handle(f) 120