1# Copyright 2003 Dave Abrahams 2# Copyright 2001, 2002 Vladimir Prus 3# Copyright 2012 Jurko Gospodnetic 4# Distributed under the Boost Software License, Version 1.0. 5# (See accompanying file LICENSE_1_0.txt or copy at 6# http://www.boost.org/LICENSE_1_0.txt) 7 8############################################################################### 9# 10# Based in part on an old Subversion tree.py source file (tools for comparing 11# directory trees). See http://subversion.tigris.org for more information. 12# 13# Copyright (c) 2001 Sam Tobin-Hochstadt. All rights reserved. 14# 15# This software is licensed as described in the file COPYING, which you should 16# have received as part of this distribution. The terms are also available at 17# http://subversion.tigris.org/license-1.html. If newer versions of this 18# license are posted there, you may use a newer version instead, at your 19# option. 20# 21############################################################################### 22 23from __future__ import print_function 24 25import os 26import os.path 27import stat 28import sys 29 30 31class TreeNode: 32 """ 33 Fundamental data type used to build file system tree structures. 34 35 If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN 36 is a list of the nodes representing that directory's children. 37 38 NAME is simply the name of the file or directory. CONTENTS is a string 39 holding the file's contents (if a file). 40 41 """ 42 43 def __init__(self, name, children=None, contents=None): 44 assert children is None or contents is None 45 self.name = name 46 self.mtime = 0 47 self.children = children 48 self.contents = contents 49 self.path = name 50 51 def add_child(self, newchild): 52 assert not self.is_file() 53 for a in self.children: 54 if a.name == newchild.name: 55 if newchild.is_file(): 56 a.contents = newchild.contents 57 a.path = os.path.join(self.path, newchild.name) 58 else: 59 for i in newchild.children: 60 a.add_child(i) 61 break 62 else: 63 self.children.append(newchild) 64 newchild.path = os.path.join(self.path, newchild.name) 65 66 def get_child(self, name): 67 """ 68 If the given TreeNode directory NODE contains a child named NAME, 69 return the child; else, return None. 70 71 """ 72 for n in self.children: 73 if n.name == name: 74 return n 75 76 def is_file(self): 77 return self.children is None 78 79 def pprint(self): 80 print(" * Node name: %s" % self.name) 81 print(" Path: %s" % self.path) 82 print(" Contents: %s" % self.contents) 83 if self.is_file(): 84 print(" Children: is a file.") 85 else: 86 print(" Children: %d" % len(self.children)) 87 88 89class TreeDifference: 90 def __init__(self): 91 self.added_files = [] 92 self.removed_files = [] 93 self.modified_files = [] 94 self.touched_files = [] 95 96 def append(self, other): 97 self.added_files.extend(other.added_files) 98 self.removed_files.extend(other.removed_files) 99 self.modified_files.extend(other.modified_files) 100 self.touched_files.extend(other.touched_files) 101 102 def ignore_directories(self): 103 """Removes directories from our lists of found differences.""" 104 not_dir = lambda x : x[-1] != "/" 105 self.added_files = list(filter(not_dir, self.added_files)) 106 self.removed_files = list(filter(not_dir, self.removed_files)) 107 self.modified_files = list(filter(not_dir, self.modified_files)) 108 self.touched_files = list(filter(not_dir, self.touched_files)) 109 110 def pprint(self, file=sys.stdout): 111 file.write("Added files : %s\n" % self.added_files) 112 file.write("Removed files : %s\n" % self.removed_files) 113 file.write("Modified files: %s\n" % self.modified_files) 114 file.write("Touched files : %s\n" % self.touched_files) 115 116 def empty(self): 117 return not (self.added_files or self.removed_files or 118 self.modified_files or self.touched_files) 119 120 121def build_tree(path): 122 """ 123 Takes PATH as the folder path, walks the file system below that path, and 124 creates a tree structure based on any files and folders found there. 125 Returns the prepared tree structure plus the maximum file modification 126 timestamp under the given folder. 127 128 """ 129 return _handle_dir(os.path.normpath(path)) 130 131 132def tree_difference(a, b): 133 """Compare TreeNodes A and B, and create a TreeDifference instance.""" 134 return _do_tree_difference(a, b, "", True) 135 136 137def _do_tree_difference(a, b, parent_path, root=False): 138 """Internal recursive worker function for tree_difference().""" 139 140 # We do not want to list root node names. 141 if root: 142 assert not parent_path 143 assert not a.is_file() 144 assert not b.is_file() 145 full_path = "" 146 else: 147 assert a.name == b.name 148 full_path = parent_path + a.name 149 result = TreeDifference() 150 151 # A and B are both files. 152 if a.is_file() and b.is_file(): 153 if a.contents != b.contents: 154 result.modified_files.append(full_path) 155 elif a.mtime != b.mtime: 156 result.touched_files.append(full_path) 157 return result 158 159 # Directory converted to file. 160 if not a.is_file() and b.is_file(): 161 result.removed_files.extend(_traverse_tree(a, parent_path)) 162 result.added_files.append(full_path) 163 164 # File converted to directory. 165 elif a.is_file() and not b.is_file(): 166 result.removed_files.append(full_path) 167 result.added_files.extend(_traverse_tree(b, parent_path)) 168 169 # A and B are both directories. 170 else: 171 if full_path: 172 full_path += "/" 173 accounted_for = [] # Children present in both trees. 174 for a_child in a.children: 175 b_child = b.get_child(a_child.name) 176 if b_child: 177 accounted_for.append(b_child) 178 result.append(_do_tree_difference(a_child, b_child, full_path)) 179 else: 180 result.removed_files.append(full_path + a_child.name) 181 for b_child in b.children: 182 if b_child not in accounted_for: 183 result.added_files.extend(_traverse_tree(b_child, full_path)) 184 185 return result 186 187 188def _traverse_tree(t, parent_path): 189 """Returns a list of all names in a tree.""" 190 assert not parent_path or parent_path[-1] == "/" 191 full_node_name = parent_path + t.name 192 if t.is_file(): 193 result = [full_node_name] 194 else: 195 name_prefix = full_node_name + "/" 196 result = [name_prefix] 197 for i in t.children: 198 result.extend(_traverse_tree(i, name_prefix)) 199 return result 200 201 202def _get_text(path): 203 """Return a string with the textual contents of a file at PATH.""" 204 fp = open(path, 'rb') 205 try: 206 return fp.read() 207 finally: 208 fp.close() 209 210 211def _handle_dir(path): 212 """ 213 Main recursive worker function for build_tree(). Returns a newly created 214 tree node representing the given normalized folder path as well as the 215 maximum file/folder modification time detected under the same path. 216 217 """ 218 files = [] 219 dirs = [] 220 node = TreeNode(os.path.basename(path), children=[]) 221 max_mtime = node.mtime = os.stat(path).st_mtime 222 223 # List files & folders. 224 for f in os.listdir(path): 225 f = os.path.join(path, f) 226 if os.path.isdir(f): 227 dirs.append(f) 228 elif os.path.isfile(f): 229 files.append(f) 230 231 # Add a child node for each file. 232 for f in files: 233 fcontents = _get_text(f) 234 new_file_node = TreeNode(os.path.basename(f), contents=fcontents) 235 new_file_node.mtime = os.stat(f).st_mtime 236 max_mtime = max(max_mtime, new_file_node.mtime) 237 node.add_child(new_file_node) 238 239 # For each subdir, create a node, walk its tree, add it as a child. 240 for d in dirs: 241 new_dir_node, new_max_mtime = _handle_dir(d) 242 max_mtime = max(max_mtime, new_max_mtime) 243 node.add_child(new_dir_node) 244 245 return node, max_mtime 246