1# Copyright (C) 2009 Google Inc. All rights reserved. 2# Copyright (C) 2010 Chris Jerdonek (chris.jerdonek@gmail.com) 3# Copyright (C) 2010 ProFUSION embedded systems 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31"""Supports reading and processing text files.""" 32 33import codecs 34import logging 35import os 36import sys 37 38 39_log = logging.getLogger(__name__) 40 41 42class TextFileReader(object): 43 44 """Supports reading and processing text files. 45 46 Attributes: 47 file_count: The total number of files passed to this instance 48 for processing, including non-text files and files 49 that should be skipped. 50 delete_only_file_count: The total number of files that are not 51 processed this instance actually because 52 the files don't have any modified lines 53 but should be treated as processed. 54 55 """ 56 57 def __init__(self, processor): 58 """Create an instance. 59 60 Arguments: 61 processor: A ProcessorBase instance. 62 63 """ 64 self._processor = processor 65 self.file_count = 0 66 self.delete_only_file_count = 0 67 68 def _read_lines(self, file_path): 69 """Read the file at a path, and return its lines. 70 71 Raises: 72 IOError: If the file does not exist or cannot be read. 73 74 """ 75 # Support the UNIX convention of using "-" for stdin. 76 if file_path == '-': 77 file = codecs.StreamReaderWriter(sys.stdin, 78 codecs.getreader('utf8'), 79 codecs.getwriter('utf8'), 80 'replace') 81 else: 82 # We do not open the file with universal newline support 83 # (codecs does not support it anyway), so the resulting 84 # lines contain trailing "\r" characters if we are reading 85 # a file with CRLF endings. 86 file = codecs.open(file_path, 'r', 'utf8', 'replace') 87 88 try: 89 contents = file.read() 90 finally: 91 file.close() 92 93 lines = contents.split('\n') 94 return lines 95 96 def process_file(self, file_path, **kwargs): 97 """Process the given file by calling the processor's process() method. 98 99 Args: 100 file_path: The path of the file to process. 101 **kwargs: Any additional keyword parameters that should be passed 102 to the processor's process() method. The process() 103 method should support these keyword arguments. 104 105 Raises: 106 SystemExit: If no file at file_path exists. 107 108 """ 109 self.file_count += 1 110 111 if not os.path.exists(file_path) and file_path != "-": 112 _log.error("File does not exist: '%s'" % file_path) 113 sys.exit(1) 114 115 if not self._processor.should_process(file_path): 116 _log.debug("Skipping file: '%s'" % file_path) 117 return 118 _log.debug("Processing file: '%s'" % file_path) 119 120 try: 121 lines = self._read_lines(file_path) 122 except IOError, err: 123 message = ("Could not read file. Skipping: '%s'\n %s" 124 % (file_path, err)) 125 _log.warn(message) 126 return 127 128 self._processor.process(lines, file_path, **kwargs) 129 130 def _process_directory(self, directory): 131 """Process all files in the given directory, recursively. 132 133 Args: 134 directory: A directory path. 135 136 """ 137 for dir_path, dir_names, file_names in os.walk(directory): 138 for file_name in file_names: 139 file_path = os.path.join(dir_path, file_name) 140 self.process_file(file_path) 141 142 def process_paths(self, paths): 143 """Process the given file and directory paths. 144 145 Args: 146 paths: A list of file and directory paths. 147 148 """ 149 for path in paths: 150 if os.path.isdir(path): 151 self._process_directory(directory=path) 152 else: 153 self.process_file(path) 154 155 def count_delete_only_file(self): 156 """Count up files that contains only deleted lines. 157 158 Files which has no modified or newly-added lines don't need 159 to check style, but should be treated as checked. For that 160 purpose, we just count up the number of such files. 161 """ 162 self.delete_only_file_count += 1 163