1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""File IO methods that wrap the C++ FileSystem API.""" 16from __future__ import absolute_import 17from __future__ import division 18from __future__ import print_function 19 20import binascii 21import os 22import uuid 23 24import six 25 26from tensorflow.python.framework import errors 27from tensorflow.python.lib.io import _pywrap_file_io 28from tensorflow.python.util import compat 29from tensorflow.python.util import deprecation 30from tensorflow.python.util.tf_export import tf_export 31 32# A good default block size depends on the system in question. 33# A somewhat conservative default chosen here. 34_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024 35 36 37class FileIO(object): 38 """FileIO class that exposes methods to read / write to / from files. 39 40 The constructor takes the following arguments: 41 name: [path-like object](https://docs.python.org/3/glossary.html#term-path-like-object) 42 giving the pathname of the file to be opened. 43 mode: one of `r`, `w`, `a`, `r+`, `w+`, `a+`. Append `b` for bytes mode. 44 45 Can be used as an iterator to iterate over lines in the file. 46 47 The default buffer size used for the BufferedInputStream used for reading 48 the file line by line is 1024 * 512 bytes. 49 """ 50 51 def __init__(self, name, mode): 52 self.__name = name 53 self.__mode = mode 54 self._read_buf = None 55 self._writable_file = None 56 self._binary_mode = "b" in mode 57 mode = mode.replace("b", "") 58 if mode not in ("r", "w", "a", "r+", "w+", "a+"): 59 raise errors.InvalidArgumentError( 60 None, None, "mode is not 'r' or 'w' or 'a' or 'r+' or 'w+' or 'a+'") 61 self._read_check_passed = mode in ("r", "r+", "a+", "w+") 62 self._write_check_passed = mode in ("a", "w", "r+", "a+", "w+") 63 64 @property 65 def name(self): 66 """Returns the file name.""" 67 return self.__name 68 69 @property 70 def mode(self): 71 """Returns the mode in which the file was opened.""" 72 return self.__mode 73 74 def _preread_check(self): 75 if not self._read_buf: 76 if not self._read_check_passed: 77 raise errors.PermissionDeniedError(None, None, 78 "File isn't open for reading") 79 self._read_buf = _pywrap_file_io.BufferedInputStream( 80 compat.path_to_str(self.__name), 1024 * 512) 81 82 def _prewrite_check(self): 83 if not self._writable_file: 84 if not self._write_check_passed: 85 raise errors.PermissionDeniedError(None, None, 86 "File isn't open for writing") 87 self._writable_file = _pywrap_file_io.WritableFile( 88 compat.path_to_bytes(self.__name), compat.as_bytes(self.__mode)) 89 90 def _prepare_value(self, val): 91 if self._binary_mode: 92 return compat.as_bytes(val) 93 else: 94 return compat.as_str_any(val) 95 96 def size(self): 97 """Returns the size of the file.""" 98 return stat(self.__name).length 99 100 def write(self, file_content): 101 """Writes file_content to the file. Appends to the end of the file.""" 102 self._prewrite_check() 103 self._writable_file.append(compat.as_bytes(file_content)) 104 105 def read(self, n=-1): 106 """Returns the contents of a file as a string. 107 108 Starts reading from current position in file. 109 110 Args: 111 n: Read `n` bytes if `n != -1`. If `n = -1`, reads to end of file. 112 113 Returns: 114 `n` bytes of the file (or whole file) in bytes mode or `n` bytes of the 115 string if in string (regular) mode. 116 """ 117 self._preread_check() 118 if n == -1: 119 length = self.size() - self.tell() 120 else: 121 length = n 122 return self._prepare_value(self._read_buf.read(length)) 123 124 @deprecation.deprecated_args( 125 None, "position is deprecated in favor of the offset argument.", 126 "position") 127 def seek(self, offset=None, whence=0, position=None): 128 # TODO(jhseu): Delete later. Used to omit `position` from docs. 129 # pylint: disable=g-doc-args 130 """Seeks to the offset in the file. 131 132 Args: 133 offset: The byte count relative to the whence argument. 134 whence: Valid values for whence are: 135 0: start of the file (default) 136 1: relative to the current position of the file 137 2: relative to the end of file. `offset` is usually negative. 138 """ 139 # pylint: enable=g-doc-args 140 self._preread_check() 141 # We needed to make offset a keyword argument for backwards-compatibility. 142 # This check exists so that we can convert back to having offset be a 143 # positional argument. 144 # TODO(jhseu): Make `offset` a positional argument after `position` is 145 # deleted. 146 if offset is None and position is None: 147 raise TypeError("seek(): offset argument required") 148 if offset is not None and position is not None: 149 raise TypeError("seek(): offset and position may not be set " 150 "simultaneously.") 151 152 if position is not None: 153 offset = position 154 155 if whence == 0: 156 pass 157 elif whence == 1: 158 offset += self.tell() 159 elif whence == 2: 160 offset += self.size() 161 else: 162 raise errors.InvalidArgumentError( 163 None, None, 164 "Invalid whence argument: {}. Valid values are 0, 1, or 2.".format( 165 whence)) 166 self._read_buf.seek(offset) 167 168 def readline(self): 169 r"""Reads the next line, keeping \n. At EOF, returns ''.""" 170 self._preread_check() 171 return self._prepare_value(self._read_buf.readline()) 172 173 def readlines(self): 174 """Returns all lines from the file in a list.""" 175 self._preread_check() 176 lines = [] 177 while True: 178 s = self.readline() 179 if not s: 180 break 181 lines.append(s) 182 return lines 183 184 def tell(self): 185 """Returns the current position in the file.""" 186 if self._read_check_passed: 187 self._preread_check() 188 return self._read_buf.tell() 189 else: 190 self._prewrite_check() 191 192 return self._writable_file.tell() 193 194 def __enter__(self): 195 """Make usable with "with" statement.""" 196 return self 197 198 def __exit__(self, unused_type, unused_value, unused_traceback): 199 """Make usable with "with" statement.""" 200 self.close() 201 202 def __iter__(self): 203 return self 204 205 def __next__(self): 206 retval = self.readline() 207 if not retval: 208 raise StopIteration() 209 return retval 210 211 def next(self): 212 return self.__next__() 213 214 def flush(self): 215 """Flushes the Writable file. 216 217 This only ensures that the data has made its way out of the process without 218 any guarantees on whether it's written to disk. This means that the 219 data would survive an application crash but not necessarily an OS crash. 220 """ 221 if self._writable_file: 222 self._writable_file.flush() 223 224 def close(self): 225 """Closes FileIO. Should be called for the WritableFile to be flushed.""" 226 self._read_buf = None 227 if self._writable_file: 228 self._writable_file.close() 229 self._writable_file = None 230 231 def seekable(self): 232 """Returns True as FileIO supports random access ops of seek()/tell()""" 233 return True 234 235 236@tf_export(v1=["gfile.Exists"]) 237def file_exists(filename): 238 """Determines whether a path exists or not. 239 240 Args: 241 filename: string, a path 242 243 Returns: 244 True if the path exists, whether it's a file or a directory. 245 False if the path does not exist and there are no filesystem errors. 246 247 Raises: 248 errors.OpError: Propagates any errors reported by the FileSystem API. 249 """ 250 return file_exists_v2(filename) 251 252 253@tf_export("io.gfile.exists") 254def file_exists_v2(path): 255 """Determines whether a path exists or not. 256 257 Args: 258 path: string, a path 259 260 Returns: 261 True if the path exists, whether it's a file or a directory. 262 False if the path does not exist and there are no filesystem errors. 263 264 Raises: 265 errors.OpError: Propagates any errors reported by the FileSystem API. 266 """ 267 try: 268 _pywrap_file_io.FileExists(compat.path_to_bytes(path)) 269 except errors.NotFoundError: 270 return False 271 return True 272 273 274@tf_export(v1=["gfile.Remove"]) 275def delete_file(filename): 276 """Deletes the file located at 'filename'. 277 278 Args: 279 filename: string, a filename 280 281 Raises: 282 errors.OpError: Propagates any errors reported by the FileSystem API. E.g., 283 `NotFoundError` if the file does not exist. 284 """ 285 delete_file_v2(filename) 286 287 288@tf_export("io.gfile.remove") 289def delete_file_v2(path): 290 """Deletes the path located at 'path'. 291 292 Args: 293 path: string, a path 294 295 Raises: 296 errors.OpError: Propagates any errors reported by the FileSystem API. E.g., 297 `NotFoundError` if the path does not exist. 298 """ 299 _pywrap_file_io.DeleteFile(compat.path_to_bytes(path)) 300 301 302def read_file_to_string(filename, binary_mode=False): 303 """Reads the entire contents of a file to a string. 304 305 Args: 306 filename: string, path to a file 307 binary_mode: whether to open the file in binary mode or not. This changes 308 the type of the object returned. 309 310 Returns: 311 contents of the file as a string or bytes. 312 313 Raises: 314 errors.OpError: Raises variety of errors that are subtypes e.g. 315 `NotFoundError` etc. 316 """ 317 if binary_mode: 318 f = FileIO(filename, mode="rb") 319 else: 320 f = FileIO(filename, mode="r") 321 return f.read() 322 323 324def write_string_to_file(filename, file_content): 325 """Writes a string to a given file. 326 327 Args: 328 filename: string, path to a file 329 file_content: string, contents that need to be written to the file 330 331 Raises: 332 errors.OpError: If there are errors during the operation. 333 """ 334 with FileIO(filename, mode="w") as f: 335 f.write(file_content) 336 337 338@tf_export(v1=["gfile.Glob"]) 339def get_matching_files(filename): 340 """Returns a list of files that match the given pattern(s). 341 342 Args: 343 filename: string or iterable of strings. The glob pattern(s). 344 345 Returns: 346 A list of strings containing filenames that match the given pattern(s). 347 348 Raises: 349 * errors.OpError: If there are filesystem / directory listing errors. 350 * errors.NotFoundError: If pattern to be matched is an invalid directory. 351 """ 352 return get_matching_files_v2(filename) 353 354 355@tf_export("io.gfile.glob") 356def get_matching_files_v2(pattern): 357 r"""Returns a list of files that match the given pattern(s). 358 359 The patterns are defined as strings. Supported patterns are defined 360 here. Note that the pattern can be a Python iteratable of string patterns. 361 362 The format definition of the pattern is: 363 364 **pattern**: `{ term }` 365 366 **term**: 367 * `'*'`: matches any sequence of non-'/' characters 368 * `'?'`: matches a single non-'/' character 369 * `'[' [ '^' ] { match-list } ']'`: matches any single 370 character (not) on the list 371 * `c`: matches character `c` where `c != '*', '?', '\\', '['` 372 * `'\\' c`: matches character `c` 373 374 **character range**: 375 * `c`: matches character `c` while `c != '\\', '-', ']'` 376 * `'\\' c`: matches character `c` 377 * `lo '-' hi`: matches character `c` for `lo <= c <= hi` 378 379 Examples: 380 381 >>> tf.io.gfile.glob("*.py") 382 ... # For example, ['__init__.py'] 383 384 >>> tf.io.gfile.glob("__init__.??") 385 ... # As above 386 387 >>> files = {"*.py"} 388 >>> the_iterator = iter(files) 389 >>> tf.io.gfile.glob(the_iterator) 390 ... # As above 391 392 See the C++ function `GetMatchingPaths` in 393 [`core/platform/file_system.h`] 394 (../../../core/platform/file_system.h) 395 for implementation details. 396 397 Args: 398 pattern: string or iterable of strings. The glob pattern(s). 399 400 Returns: 401 A list of strings containing filenames that match the given pattern(s). 402 403 Raises: 404 errors.OpError: If there are filesystem / directory listing errors. 405 errors.NotFoundError: If pattern to be matched is an invalid directory. 406 """ 407 if isinstance(pattern, six.string_types): 408 return [ 409 # Convert the filenames to string from bytes. 410 compat.as_str_any(matching_filename) 411 for matching_filename in _pywrap_file_io.GetMatchingFiles( 412 compat.as_bytes(pattern)) 413 ] 414 else: 415 return [ 416 # Convert the filenames to string from bytes. 417 compat.as_str_any(matching_filename) # pylint: disable=g-complex-comprehension 418 for single_filename in pattern 419 for matching_filename in _pywrap_file_io.GetMatchingFiles( 420 compat.as_bytes(single_filename)) 421 ] 422 423 424@tf_export(v1=["gfile.MkDir"]) 425def create_dir(dirname): 426 """Creates a directory with the name `dirname`. 427 428 Args: 429 dirname: string, name of the directory to be created 430 431 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs` 432 instead if there is the possibility that the parent dirs don't exist. 433 434 Raises: 435 errors.OpError: If the operation fails. 436 """ 437 create_dir_v2(dirname) 438 439 440@tf_export("io.gfile.mkdir") 441def create_dir_v2(path): 442 """Creates a directory with the name given by `path`. 443 444 Args: 445 path: string, name of the directory to be created 446 447 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs` 448 instead if there is the possibility that the parent dirs don't exist. 449 450 Raises: 451 errors.OpError: If the operation fails. 452 """ 453 _pywrap_file_io.CreateDir(compat.path_to_bytes(path)) 454 455 456@tf_export(v1=["gfile.MakeDirs"]) 457def recursive_create_dir(dirname): 458 """Creates a directory and all parent/intermediate directories. 459 460 It succeeds if dirname already exists and is writable. 461 462 Args: 463 dirname: string, name of the directory to be created 464 465 Raises: 466 errors.OpError: If the operation fails. 467 """ 468 recursive_create_dir_v2(dirname) 469 470 471@tf_export("io.gfile.makedirs") 472def recursive_create_dir_v2(path): 473 """Creates a directory and all parent/intermediate directories. 474 475 It succeeds if path already exists and is writable. 476 477 Args: 478 path: string, name of the directory to be created 479 480 Raises: 481 errors.OpError: If the operation fails. 482 """ 483 _pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path)) 484 485 486@tf_export(v1=["gfile.Copy"]) 487def copy(oldpath, newpath, overwrite=False): 488 """Copies data from `oldpath` to `newpath`. 489 490 Args: 491 oldpath: string, name of the file who's contents need to be copied 492 newpath: string, name of the file to which to copy to 493 overwrite: boolean, if false it's an error for `newpath` to be occupied by 494 an existing file. 495 496 Raises: 497 errors.OpError: If the operation fails. 498 """ 499 copy_v2(oldpath, newpath, overwrite) 500 501 502@tf_export("io.gfile.copy") 503def copy_v2(src, dst, overwrite=False): 504 """Copies data from `src` to `dst`. 505 506 Args: 507 src: string, name of the file whose contents need to be copied 508 dst: string, name of the file to which to copy to 509 overwrite: boolean, if false it's an error for `dst` to be occupied by an 510 existing file. 511 512 Raises: 513 errors.OpError: If the operation fails. 514 """ 515 _pywrap_file_io.CopyFile( 516 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite) 517 518 519@tf_export(v1=["gfile.Rename"]) 520def rename(oldname, newname, overwrite=False): 521 """Rename or move a file / directory. 522 523 Args: 524 oldname: string, pathname for a file 525 newname: string, pathname to which the file needs to be moved 526 overwrite: boolean, if false it's an error for `newname` to be occupied by 527 an existing file. 528 529 Raises: 530 errors.OpError: If the operation fails. 531 """ 532 rename_v2(oldname, newname, overwrite) 533 534 535@tf_export("io.gfile.rename") 536def rename_v2(src, dst, overwrite=False): 537 """Rename or move a file / directory. 538 539 Args: 540 src: string, pathname for a file 541 dst: string, pathname to which the file needs to be moved 542 overwrite: boolean, if false it's an error for `dst` to be occupied by an 543 existing file. 544 545 Raises: 546 errors.OpError: If the operation fails. 547 """ 548 _pywrap_file_io.RenameFile( 549 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite) 550 551 552def atomic_write_string_to_file(filename, contents, overwrite=True): 553 """Writes to `filename` atomically. 554 555 This means that when `filename` appears in the filesystem, it will contain 556 all of `contents`. With write_string_to_file, it is possible for the file 557 to appear in the filesystem with `contents` only partially written. 558 559 Accomplished by writing to a temp file and then renaming it. 560 561 Args: 562 filename: string, pathname for a file 563 contents: string, contents that need to be written to the file 564 overwrite: boolean, if false it's an error for `filename` to be occupied by 565 an existing file. 566 """ 567 if not has_atomic_move(filename): 568 write_string_to_file(filename, contents) 569 else: 570 temp_pathname = filename + ".tmp" + uuid.uuid4().hex 571 write_string_to_file(temp_pathname, contents) 572 try: 573 rename(temp_pathname, filename, overwrite) 574 except errors.OpError: 575 delete_file(temp_pathname) 576 raise 577 578 579@tf_export(v1=["gfile.DeleteRecursively"]) 580def delete_recursively(dirname): 581 """Deletes everything under dirname recursively. 582 583 Args: 584 dirname: string, a path to a directory 585 586 Raises: 587 errors.OpError: If the operation fails. 588 """ 589 delete_recursively_v2(dirname) 590 591 592@tf_export("io.gfile.rmtree") 593def delete_recursively_v2(path): 594 """Deletes everything under path recursively. 595 596 Args: 597 path: string, a path 598 599 Raises: 600 errors.OpError: If the operation fails. 601 """ 602 _pywrap_file_io.DeleteRecursively(compat.path_to_bytes(path)) 603 604 605@tf_export(v1=["gfile.IsDirectory"]) 606def is_directory(dirname): 607 """Returns whether the path is a directory or not. 608 609 Args: 610 dirname: string, path to a potential directory 611 612 Returns: 613 True, if the path is a directory; False otherwise 614 """ 615 return is_directory_v2(dirname) 616 617 618@tf_export("io.gfile.isdir") 619def is_directory_v2(path): 620 """Returns whether the path is a directory or not. 621 622 Args: 623 path: string, path to a potential directory 624 625 Returns: 626 True, if the path is a directory; False otherwise 627 """ 628 try: 629 return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path)) 630 except errors.OpError: 631 return False 632 633 634def has_atomic_move(path): 635 """Checks whether the file system supports atomic moves. 636 637 Returns whether or not the file system of the given path supports the atomic 638 move operation for a file or folder. If atomic move is supported, it is 639 recommended to use a temp location for writing and then move to the final 640 location. 641 642 Args: 643 path: string, path to a file 644 645 Returns: 646 True, if the path is on a file system that supports atomic move 647 False, if the file system does not support atomic move. In such cases 648 we need to be careful about using moves. In some cases it is safer 649 not to use temporary locations in this case. 650 """ 651 try: 652 return _pywrap_file_io.HasAtomicMove(compat.path_to_bytes(path)) 653 except errors.OpError: 654 # defaults to True 655 return True 656 657 658@tf_export(v1=["gfile.ListDirectory"]) 659def list_directory(dirname): 660 """Returns a list of entries contained within a directory. 661 662 The list is in arbitrary order. It does not contain the special entries "." 663 and "..". 664 665 Args: 666 dirname: string, path to a directory 667 668 Returns: 669 [filename1, filename2, ... filenameN] as strings 670 671 Raises: 672 errors.NotFoundError if directory doesn't exist 673 """ 674 return list_directory_v2(dirname) 675 676 677@tf_export("io.gfile.listdir") 678def list_directory_v2(path): 679 """Returns a list of entries contained within a directory. 680 681 The list is in arbitrary order. It does not contain the special entries "." 682 and "..". 683 684 Args: 685 path: string, path to a directory 686 687 Returns: 688 [filename1, filename2, ... filenameN] as strings 689 690 Raises: 691 errors.NotFoundError if directory doesn't exist 692 """ 693 if not is_directory(path): 694 raise errors.NotFoundError( 695 node_def=None, 696 op=None, 697 message="Could not find directory {}".format(path)) 698 699 # Convert each element to string, since the return values of the 700 # vector of string should be interpreted as strings, not bytes. 701 return [ 702 compat.as_str_any(filename) 703 for filename in _pywrap_file_io.GetChildren(compat.path_to_bytes(path)) 704 ] 705 706 707@tf_export(v1=["gfile.Walk"]) 708def walk(top, in_order=True): 709 """Recursive directory tree generator for directories. 710 711 Args: 712 top: string, a Directory name 713 in_order: bool, Traverse in order if True, post order if False. Errors that 714 happen while listing directories are ignored. 715 716 Yields: 717 Each yield is a 3-tuple: the pathname of a directory, followed by lists of 718 all its subdirectories and leaf files. That is, each yield looks like: 719 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`. 720 Each item is a string. 721 """ 722 return walk_v2(top, in_order) 723 724 725@tf_export("io.gfile.walk") 726def walk_v2(top, topdown=True, onerror=None): 727 """Recursive directory tree generator for directories. 728 729 Args: 730 top: string, a Directory name 731 topdown: bool, Traverse pre order if True, post order if False. 732 onerror: optional handler for errors. Should be a function, it will be 733 called with the error as argument. Rethrowing the error aborts the walk. 734 Errors that happen while listing directories are ignored. 735 736 Yields: 737 Each yield is a 3-tuple: the pathname of a directory, followed by lists of 738 all its subdirectories and leaf files. That is, each yield looks like: 739 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`. 740 Each item is a string. 741 """ 742 743 def _make_full_path(parent, item): 744 # Since `os.path.join` discards paths before one that starts with the path 745 # separator (https://docs.python.org/3/library/os.path.html#os.path.join), 746 # we have to manually handle that case as `/` is a valid character on GCS. 747 if item[0] == os.sep: 748 return "".join([os.path.join(parent, ""), item]) 749 return os.path.join(parent, item) 750 751 top = compat.as_str_any(compat.path_to_str(top)) 752 try: 753 listing = list_directory(top) 754 except errors.NotFoundError as err: 755 if onerror: 756 onerror(err) 757 else: 758 return 759 760 files = [] 761 subdirs = [] 762 for item in listing: 763 full_path = _make_full_path(top, item) 764 if is_directory(full_path): 765 subdirs.append(item) 766 else: 767 files.append(item) 768 769 here = (top, subdirs, files) 770 771 if topdown: 772 yield here 773 774 for subdir in subdirs: 775 for subitem in walk_v2( 776 _make_full_path(top, subdir), topdown, onerror=onerror): 777 yield subitem 778 779 if not topdown: 780 yield here 781 782 783@tf_export(v1=["gfile.Stat"]) 784def stat(filename): 785 """Returns file statistics for a given path. 786 787 Args: 788 filename: string, path to a file 789 790 Returns: 791 FileStatistics struct that contains information about the path 792 793 Raises: 794 errors.OpError: If the operation fails. 795 """ 796 return stat_v2(filename) 797 798 799@tf_export("io.gfile.stat") 800def stat_v2(path): 801 """Returns file statistics for a given path. 802 803 Args: 804 path: string, path to a file 805 806 Returns: 807 FileStatistics struct that contains information about the path 808 809 Raises: 810 errors.OpError: If the operation fails. 811 """ 812 return _pywrap_file_io.Stat(compat.path_to_str(path)) 813 814 815def filecmp(filename_a, filename_b): 816 """Compare two files, returning True if they are the same, False otherwise. 817 818 We check size first and return False quickly if the files are different sizes. 819 If they are the same size, we continue to generating a crc for the whole file. 820 821 You might wonder: why not use Python's `filecmp.cmp()` instead? The answer is 822 that the builtin library is not robust to the many different filesystems 823 TensorFlow runs on, and so we here perform a similar comparison with 824 the more robust FileIO. 825 826 Args: 827 filename_a: string path to the first file. 828 filename_b: string path to the second file. 829 830 Returns: 831 True if the files are the same, False otherwise. 832 """ 833 size_a = FileIO(filename_a, "rb").size() 834 size_b = FileIO(filename_b, "rb").size() 835 if size_a != size_b: 836 return False 837 838 # Size is the same. Do a full check. 839 crc_a = file_crc32(filename_a) 840 crc_b = file_crc32(filename_b) 841 return crc_a == crc_b 842 843 844def file_crc32(filename, block_size=_DEFAULT_BLOCK_SIZE): 845 """Get the crc32 of the passed file. 846 847 The crc32 of a file can be used for error checking; two files with the same 848 crc32 are considered equivalent. Note that the entire file must be read 849 to produce the crc32. 850 851 Args: 852 filename: string, path to a file 853 block_size: Integer, process the files by reading blocks of `block_size` 854 bytes. Use -1 to read the file as once. 855 856 Returns: 857 hexadecimal as string, the crc32 of the passed file. 858 """ 859 crc = 0 860 with FileIO(filename, mode="rb") as f: 861 chunk = f.read(n=block_size) 862 while chunk: 863 crc = binascii.crc32(chunk, crc) 864 chunk = f.read(n=block_size) 865 return hex(crc & 0xFFFFFFFF) 866