1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""File IO methods that wrap the C++ FileSystem API.""" 16from __future__ import absolute_import 17from __future__ import division 18from __future__ import print_function 19 20import binascii 21import os 22import uuid 23 24import six 25 26from tensorflow.python.framework import errors 27from tensorflow.python.lib.io import _pywrap_file_io 28from tensorflow.python.util import compat 29from tensorflow.python.util import deprecation 30from tensorflow.python.util.tf_export import tf_export 31 32# A good default block size depends on the system in question. 33# A somewhat conservative default chosen here. 34_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024 35 36 37class FileIO(object): 38 """FileIO class that exposes methods to read / write to / from files. 39 40 The constructor takes the following arguments: 41 name: [path-like object](https://docs.python.org/3/glossary.html#term-path-like-object) 42 giving the pathname of the file to be opened. 43 mode: one of `r`, `w`, `a`, `r+`, `w+`, `a+`. Append `b` for bytes mode. 44 45 Can be used as an iterator to iterate over lines in the file. 46 47 The default buffer size used for the BufferedInputStream used for reading 48 the file line by line is 1024 * 512 bytes. 49 """ 50 51 def __init__(self, name, mode): 52 self.__name = name 53 self.__mode = mode 54 self._read_buf = None 55 self._writable_file = None 56 self._binary_mode = "b" in mode 57 mode = mode.replace("b", "") 58 if mode not in ("r", "w", "a", "r+", "w+", "a+"): 59 raise errors.InvalidArgumentError( 60 None, None, "mode is not 'r' or 'w' or 'a' or 'r+' or 'w+' or 'a+'") 61 self._read_check_passed = mode in ("r", "r+", "a+", "w+") 62 self._write_check_passed = mode in ("a", "w", "r+", "a+", "w+") 63 64 @property 65 def name(self): 66 """Returns the file name.""" 67 return self.__name 68 69 @property 70 def mode(self): 71 """Returns the mode in which the file was opened.""" 72 return self.__mode 73 74 def _preread_check(self): 75 if not self._read_buf: 76 if not self._read_check_passed: 77 raise errors.PermissionDeniedError(None, None, 78 "File isn't open for reading") 79 self._read_buf = _pywrap_file_io.BufferedInputStream( 80 compat.path_to_str(self.__name), 1024 * 512) 81 82 def _prewrite_check(self): 83 if not self._writable_file: 84 if not self._write_check_passed: 85 raise errors.PermissionDeniedError(None, None, 86 "File isn't open for writing") 87 self._writable_file = _pywrap_file_io.WritableFile( 88 compat.path_to_bytes(self.__name), compat.as_bytes(self.__mode)) 89 90 def _prepare_value(self, val): 91 if self._binary_mode: 92 return compat.as_bytes(val) 93 else: 94 return compat.as_str_any(val) 95 96 def size(self): 97 """Returns the size of the file.""" 98 return stat(self.__name).length 99 100 def write(self, file_content): 101 """Writes file_content to the file. Appends to the end of the file.""" 102 self._prewrite_check() 103 self._writable_file.append(compat.as_bytes(file_content)) 104 105 def read(self, n=-1): 106 """Returns the contents of a file as a string. 107 108 Starts reading from current position in file. 109 110 Args: 111 n: Read `n` bytes if `n != -1`. If `n = -1`, reads to end of file. 112 113 Returns: 114 `n` bytes of the file (or whole file) in bytes mode or `n` bytes of the 115 string if in string (regular) mode. 116 """ 117 self._preread_check() 118 if n == -1: 119 length = self.size() - self.tell() 120 else: 121 length = n 122 return self._prepare_value(self._read_buf.read(length)) 123 124 @deprecation.deprecated_args( 125 None, "position is deprecated in favor of the offset argument.", 126 "position") 127 def seek(self, offset=None, whence=0, position=None): 128 # TODO(jhseu): Delete later. Used to omit `position` from docs. 129 # pylint: disable=g-doc-args 130 """Seeks to the offset in the file. 131 132 Args: 133 offset: The byte count relative to the whence argument. 134 whence: Valid values for whence are: 135 0: start of the file (default) 136 1: relative to the current position of the file 137 2: relative to the end of file. `offset` is usually negative. 138 """ 139 # pylint: enable=g-doc-args 140 self._preread_check() 141 # We needed to make offset a keyword argument for backwards-compatibility. 142 # This check exists so that we can convert back to having offset be a 143 # positional argument. 144 # TODO(jhseu): Make `offset` a positional argument after `position` is 145 # deleted. 146 if offset is None and position is None: 147 raise TypeError("seek(): offset argument required") 148 if offset is not None and position is not None: 149 raise TypeError("seek(): offset and position may not be set " 150 "simultaneously.") 151 152 if position is not None: 153 offset = position 154 155 if whence == 0: 156 pass 157 elif whence == 1: 158 offset += self.tell() 159 elif whence == 2: 160 offset += self.size() 161 else: 162 raise errors.InvalidArgumentError( 163 None, None, 164 "Invalid whence argument: {}. Valid values are 0, 1, or 2.".format( 165 whence)) 166 self._read_buf.seek(offset) 167 168 def readline(self): 169 r"""Reads the next line, keeping \n. At EOF, returns ''.""" 170 self._preread_check() 171 return self._prepare_value(self._read_buf.readline()) 172 173 def readlines(self): 174 """Returns all lines from the file in a list.""" 175 self._preread_check() 176 lines = [] 177 while True: 178 s = self.readline() 179 if not s: 180 break 181 lines.append(s) 182 return lines 183 184 def tell(self): 185 """Returns the current position in the file.""" 186 if self._read_check_passed: 187 self._preread_check() 188 return self._read_buf.tell() 189 else: 190 self._prewrite_check() 191 192 return self._writable_file.tell() 193 194 def __enter__(self): 195 """Make usable with "with" statement.""" 196 return self 197 198 def __exit__(self, unused_type, unused_value, unused_traceback): 199 """Make usable with "with" statement.""" 200 self.close() 201 202 def __iter__(self): 203 return self 204 205 def __next__(self): 206 retval = self.readline() 207 if not retval: 208 raise StopIteration() 209 return retval 210 211 def next(self): 212 return self.__next__() 213 214 def flush(self): 215 """Flushes the Writable file. 216 217 This only ensures that the data has made its way out of the process without 218 any guarantees on whether it's written to disk. This means that the 219 data would survive an application crash but not necessarily an OS crash. 220 """ 221 if self._writable_file: 222 self._writable_file.flush() 223 224 def close(self): 225 r"""Closes the file. 226 227 Should be called for the WritableFile to be flushed. 228 229 In general, if you use the context manager pattern, you don't need to call 230 this directly. 231 232 >>> with tf.io.gfile.GFile("/tmp/x", "w") as f: 233 ... f.write("asdf\n") 234 ... f.write("qwer\n") 235 >>> # implicit f.close() at the end of the block 236 237 For cloud filesystems, forgetting to call `close()` might result in data 238 loss as last write might not have been replicated. 239 """ 240 self._read_buf = None 241 if self._writable_file: 242 self._writable_file.close() 243 self._writable_file = None 244 245 def seekable(self): 246 """Returns True as FileIO supports random access ops of seek()/tell()""" 247 return True 248 249 250@tf_export("io.gfile.exists") 251def file_exists_v2(path): 252 """Determines whether a path exists or not. 253 254 >>> with open("/tmp/x", "w") as f: 255 ... f.write("asdf") 256 ... 257 4 258 >>> tf.io.gfile.exists("/tmp/x") 259 True 260 261 You can also specify the URI scheme for selecting a different filesystem: 262 263 >>> # for a GCS filesystem path: 264 >>> # tf.io.gfile.exists("gs://bucket/file") 265 >>> # for a local filesystem: 266 >>> with open("/tmp/x", "w") as f: 267 ... f.write("asdf") 268 ... 269 4 270 >>> tf.io.gfile.exists("file:///tmp/x") 271 True 272 273 This currently returns `True` for existing directories but don't rely on this 274 behavior, especially if you are using cloud filesystems (e.g., GCS, S3, 275 Hadoop): 276 277 >>> tf.io.gfile.exists("/tmp") 278 True 279 280 Args: 281 path: string, a path 282 283 Returns: 284 True if the path exists, whether it's a file or a directory. 285 False if the path does not exist and there are no filesystem errors. 286 287 Raises: 288 errors.OpError: Propagates any errors reported by the FileSystem API. 289 """ 290 try: 291 _pywrap_file_io.FileExists(compat.path_to_bytes(path)) 292 except errors.NotFoundError: 293 return False 294 return True 295 296 297@tf_export(v1=["gfile.Exists"]) 298def file_exists(filename): 299 return file_exists_v2(filename) 300 301 302file_exists.__doc__ = file_exists_v2.__doc__ 303 304 305@tf_export(v1=["gfile.Remove"]) 306def delete_file(filename): 307 """Deletes the file located at 'filename'. 308 309 Args: 310 filename: string, a filename 311 312 Raises: 313 errors.OpError: Propagates any errors reported by the FileSystem API. E.g., 314 `NotFoundError` if the file does not exist. 315 """ 316 delete_file_v2(filename) 317 318 319@tf_export("io.gfile.remove") 320def delete_file_v2(path): 321 """Deletes the path located at 'path'. 322 323 Args: 324 path: string, a path 325 326 Raises: 327 errors.OpError: Propagates any errors reported by the FileSystem API. E.g., 328 `NotFoundError` if the path does not exist. 329 """ 330 _pywrap_file_io.DeleteFile(compat.path_to_bytes(path)) 331 332 333def read_file_to_string(filename, binary_mode=False): 334 """Reads the entire contents of a file to a string. 335 336 Args: 337 filename: string, path to a file 338 binary_mode: whether to open the file in binary mode or not. This changes 339 the type of the object returned. 340 341 Returns: 342 contents of the file as a string or bytes. 343 344 Raises: 345 errors.OpError: Raises variety of errors that are subtypes e.g. 346 `NotFoundError` etc. 347 """ 348 if binary_mode: 349 f = FileIO(filename, mode="rb") 350 else: 351 f = FileIO(filename, mode="r") 352 return f.read() 353 354 355def write_string_to_file(filename, file_content): 356 """Writes a string to a given file. 357 358 Args: 359 filename: string, path to a file 360 file_content: string, contents that need to be written to the file 361 362 Raises: 363 errors.OpError: If there are errors during the operation. 364 """ 365 with FileIO(filename, mode="w") as f: 366 f.write(file_content) 367 368 369@tf_export(v1=["gfile.Glob"]) 370def get_matching_files(filename): 371 """Returns a list of files that match the given pattern(s). 372 373 Args: 374 filename: string or iterable of strings. The glob pattern(s). 375 376 Returns: 377 A list of strings containing filenames that match the given pattern(s). 378 379 Raises: 380 * errors.OpError: If there are filesystem / directory listing errors. 381 * errors.NotFoundError: If pattern to be matched is an invalid directory. 382 """ 383 return get_matching_files_v2(filename) 384 385 386@tf_export("io.gfile.glob") 387def get_matching_files_v2(pattern): 388 r"""Returns a list of files that match the given pattern(s). 389 390 The patterns are defined as strings. Supported patterns are defined 391 here. Note that the pattern can be a Python iteratable of string patterns. 392 393 The format definition of the pattern is: 394 395 **pattern**: `{ term }` 396 397 **term**: 398 * `'*'`: matches any sequence of non-'/' characters 399 * `'?'`: matches a single non-'/' character 400 * `'[' [ '^' ] { match-list } ']'`: matches any single 401 character (not) on the list 402 * `c`: matches character `c` where `c != '*', '?', '\\', '['` 403 * `'\\' c`: matches character `c` 404 405 **character range**: 406 * `c`: matches character `c` while `c != '\\', '-', ']'` 407 * `'\\' c`: matches character `c` 408 * `lo '-' hi`: matches character `c` for `lo <= c <= hi` 409 410 Examples: 411 412 >>> tf.io.gfile.glob("*.py") 413 ... # For example, ['__init__.py'] 414 415 >>> tf.io.gfile.glob("__init__.??") 416 ... # As above 417 418 >>> files = {"*.py"} 419 >>> the_iterator = iter(files) 420 >>> tf.io.gfile.glob(the_iterator) 421 ... # As above 422 423 See the C++ function `GetMatchingPaths` in 424 [`core/platform/file_system.h`] 425 (../../../core/platform/file_system.h) 426 for implementation details. 427 428 Args: 429 pattern: string or iterable of strings. The glob pattern(s). 430 431 Returns: 432 A list of strings containing filenames that match the given pattern(s). 433 434 Raises: 435 errors.OpError: If there are filesystem / directory listing errors. 436 errors.NotFoundError: If pattern to be matched is an invalid directory. 437 """ 438 if isinstance(pattern, six.string_types): 439 return [ 440 # Convert the filenames to string from bytes. 441 compat.as_str_any(matching_filename) 442 for matching_filename in _pywrap_file_io.GetMatchingFiles( 443 compat.as_bytes(pattern)) 444 ] 445 else: 446 return [ 447 # Convert the filenames to string from bytes. 448 compat.as_str_any(matching_filename) # pylint: disable=g-complex-comprehension 449 for single_filename in pattern 450 for matching_filename in _pywrap_file_io.GetMatchingFiles( 451 compat.as_bytes(single_filename)) 452 ] 453 454 455@tf_export(v1=["gfile.MkDir"]) 456def create_dir(dirname): 457 """Creates a directory with the name `dirname`. 458 459 Args: 460 dirname: string, name of the directory to be created 461 462 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs` 463 instead if there is the possibility that the parent dirs don't exist. 464 465 Raises: 466 errors.OpError: If the operation fails. 467 """ 468 create_dir_v2(dirname) 469 470 471@tf_export("io.gfile.mkdir") 472def create_dir_v2(path): 473 """Creates a directory with the name given by `path`. 474 475 Args: 476 path: string, name of the directory to be created 477 478 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs` 479 instead if there is the possibility that the parent dirs don't exist. 480 481 Raises: 482 errors.OpError: If the operation fails. 483 """ 484 _pywrap_file_io.CreateDir(compat.path_to_bytes(path)) 485 486 487@tf_export(v1=["gfile.MakeDirs"]) 488def recursive_create_dir(dirname): 489 """Creates a directory and all parent/intermediate directories. 490 491 It succeeds if dirname already exists and is writable. 492 493 Args: 494 dirname: string, name of the directory to be created 495 496 Raises: 497 errors.OpError: If the operation fails. 498 """ 499 recursive_create_dir_v2(dirname) 500 501 502@tf_export("io.gfile.makedirs") 503def recursive_create_dir_v2(path): 504 """Creates a directory and all parent/intermediate directories. 505 506 It succeeds if path already exists and is writable. 507 508 Args: 509 path: string, name of the directory to be created 510 511 Raises: 512 errors.OpError: If the operation fails. 513 """ 514 _pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path)) 515 516 517@tf_export("io.gfile.copy") 518def copy_v2(src, dst, overwrite=False): 519 """Copies data from `src` to `dst`. 520 521 >>> with open("/tmp/x", "w") as f: 522 ... f.write("asdf") 523 ... 524 4 525 >>> tf.io.gfile.exists("/tmp/x") 526 True 527 >>> tf.io.gfile.copy("/tmp/x", "/tmp/y") 528 >>> tf.io.gfile.exists("/tmp/y") 529 True 530 >>> tf.io.gfile.remove("/tmp/y") 531 532 You can also specify the URI scheme for selecting a different filesystem: 533 534 >>> with open("/tmp/x", "w") as f: 535 ... f.write("asdf") 536 ... 537 4 538 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y") 539 >>> tf.io.gfile.exists("/tmp/y") 540 True 541 >>> tf.io.gfile.remove("/tmp/y") 542 543 Note that you need to always specify a file name, even if moving into a new 544 directory. This is because some cloud filesystems don't have the concept of a 545 directory. 546 547 >>> with open("/tmp/x", "w") as f: 548 ... f.write("asdf") 549 ... 550 4 551 >>> tf.io.gfile.mkdir("/tmp/new_dir") 552 >>> tf.io.gfile.copy("/tmp/x", "/tmp/new_dir/y") 553 >>> tf.io.gfile.exists("/tmp/new_dir/y") 554 True 555 >>> tf.io.gfile.rmtree("/tmp/new_dir") 556 557 If you want to prevent errors if the path already exists, you can use 558 `overwrite` argument: 559 560 >>> with open("/tmp/x", "w") as f: 561 ... f.write("asdf") 562 ... 563 4 564 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y") 565 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y", overwrite=True) 566 >>> tf.io.gfile.remove("/tmp/y") 567 568 Note that the above will still result in an error if you try to overwrite a 569 directory with a file. 570 571 Note that you cannot copy a directory, only file arguments are supported. 572 573 Args: 574 src: string, name of the file whose contents need to be copied 575 dst: string, name of the file to which to copy to 576 overwrite: boolean, if false it's an error for `dst` to be occupied by an 577 existing file. 578 579 Raises: 580 errors.OpError: If the operation fails. 581 """ 582 _pywrap_file_io.CopyFile( 583 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite) 584 585 586@tf_export(v1=["gfile.Copy"]) 587def copy(oldpath, newpath, overwrite=False): 588 copy_v2(oldpath, newpath, overwrite) 589 590 591copy.__doc__ = copy_v2.__doc__ 592 593 594@tf_export(v1=["gfile.Rename"]) 595def rename(oldname, newname, overwrite=False): 596 """Rename or move a file / directory. 597 598 Args: 599 oldname: string, pathname for a file 600 newname: string, pathname to which the file needs to be moved 601 overwrite: boolean, if false it's an error for `newname` to be occupied by 602 an existing file. 603 604 Raises: 605 errors.OpError: If the operation fails. 606 """ 607 rename_v2(oldname, newname, overwrite) 608 609 610@tf_export("io.gfile.rename") 611def rename_v2(src, dst, overwrite=False): 612 """Rename or move a file / directory. 613 614 Args: 615 src: string, pathname for a file 616 dst: string, pathname to which the file needs to be moved 617 overwrite: boolean, if false it's an error for `dst` to be occupied by an 618 existing file. 619 620 Raises: 621 errors.OpError: If the operation fails. 622 """ 623 _pywrap_file_io.RenameFile( 624 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite) 625 626 627def atomic_write_string_to_file(filename, contents, overwrite=True): 628 """Writes to `filename` atomically. 629 630 This means that when `filename` appears in the filesystem, it will contain 631 all of `contents`. With write_string_to_file, it is possible for the file 632 to appear in the filesystem with `contents` only partially written. 633 634 Accomplished by writing to a temp file and then renaming it. 635 636 Args: 637 filename: string, pathname for a file 638 contents: string, contents that need to be written to the file 639 overwrite: boolean, if false it's an error for `filename` to be occupied by 640 an existing file. 641 """ 642 if not has_atomic_move(filename): 643 write_string_to_file(filename, contents) 644 else: 645 temp_pathname = filename + ".tmp" + uuid.uuid4().hex 646 write_string_to_file(temp_pathname, contents) 647 try: 648 rename(temp_pathname, filename, overwrite) 649 except errors.OpError: 650 delete_file(temp_pathname) 651 raise 652 653 654@tf_export(v1=["gfile.DeleteRecursively"]) 655def delete_recursively(dirname): 656 """Deletes everything under dirname recursively. 657 658 Args: 659 dirname: string, a path to a directory 660 661 Raises: 662 errors.OpError: If the operation fails. 663 """ 664 delete_recursively_v2(dirname) 665 666 667@tf_export("io.gfile.rmtree") 668def delete_recursively_v2(path): 669 """Deletes everything under path recursively. 670 671 Args: 672 path: string, a path 673 674 Raises: 675 errors.OpError: If the operation fails. 676 """ 677 _pywrap_file_io.DeleteRecursively(compat.path_to_bytes(path)) 678 679 680@tf_export(v1=["gfile.IsDirectory"]) 681def is_directory(dirname): 682 """Returns whether the path is a directory or not. 683 684 Args: 685 dirname: string, path to a potential directory 686 687 Returns: 688 True, if the path is a directory; False otherwise 689 """ 690 return is_directory_v2(dirname) 691 692 693@tf_export("io.gfile.isdir") 694def is_directory_v2(path): 695 """Returns whether the path is a directory or not. 696 697 Args: 698 path: string, path to a potential directory 699 700 Returns: 701 True, if the path is a directory; False otherwise 702 """ 703 try: 704 return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path)) 705 except errors.OpError: 706 return False 707 708 709def has_atomic_move(path): 710 """Checks whether the file system supports atomic moves. 711 712 Returns whether or not the file system of the given path supports the atomic 713 move operation for a file or folder. If atomic move is supported, it is 714 recommended to use a temp location for writing and then move to the final 715 location. 716 717 Args: 718 path: string, path to a file 719 720 Returns: 721 True, if the path is on a file system that supports atomic move 722 False, if the file system does not support atomic move. In such cases 723 we need to be careful about using moves. In some cases it is safer 724 not to use temporary locations in this case. 725 """ 726 try: 727 return _pywrap_file_io.HasAtomicMove(compat.path_to_bytes(path)) 728 except errors.OpError: 729 # defaults to True 730 return True 731 732 733@tf_export(v1=["gfile.ListDirectory"]) 734def list_directory(dirname): 735 """Returns a list of entries contained within a directory. 736 737 The list is in arbitrary order. It does not contain the special entries "." 738 and "..". 739 740 Args: 741 dirname: string, path to a directory 742 743 Returns: 744 [filename1, filename2, ... filenameN] as strings 745 746 Raises: 747 errors.NotFoundError if directory doesn't exist 748 """ 749 return list_directory_v2(dirname) 750 751 752@tf_export("io.gfile.listdir") 753def list_directory_v2(path): 754 """Returns a list of entries contained within a directory. 755 756 The list is in arbitrary order. It does not contain the special entries "." 757 and "..". 758 759 Args: 760 path: string, path to a directory 761 762 Returns: 763 [filename1, filename2, ... filenameN] as strings 764 765 Raises: 766 errors.NotFoundError if directory doesn't exist 767 """ 768 if not is_directory(path): 769 raise errors.NotFoundError( 770 node_def=None, 771 op=None, 772 message="Could not find directory {}".format(path)) 773 774 # Convert each element to string, since the return values of the 775 # vector of string should be interpreted as strings, not bytes. 776 return [ 777 compat.as_str_any(filename) 778 for filename in _pywrap_file_io.GetChildren(compat.path_to_bytes(path)) 779 ] 780 781 782@tf_export(v1=["gfile.Walk"]) 783def walk(top, in_order=True): 784 """Recursive directory tree generator for directories. 785 786 Args: 787 top: string, a Directory name 788 in_order: bool, Traverse in order if True, post order if False. Errors that 789 happen while listing directories are ignored. 790 791 Yields: 792 Each yield is a 3-tuple: the pathname of a directory, followed by lists of 793 all its subdirectories and leaf files. That is, each yield looks like: 794 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`. 795 Each item is a string. 796 """ 797 return walk_v2(top, in_order) 798 799 800@tf_export("io.gfile.walk") 801def walk_v2(top, topdown=True, onerror=None): 802 """Recursive directory tree generator for directories. 803 804 Args: 805 top: string, a Directory name 806 topdown: bool, Traverse pre order if True, post order if False. 807 onerror: optional handler for errors. Should be a function, it will be 808 called with the error as argument. Rethrowing the error aborts the walk. 809 Errors that happen while listing directories are ignored. 810 811 Yields: 812 Each yield is a 3-tuple: the pathname of a directory, followed by lists of 813 all its subdirectories and leaf files. That is, each yield looks like: 814 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`. 815 Each item is a string. 816 """ 817 818 def _make_full_path(parent, item): 819 # Since `os.path.join` discards paths before one that starts with the path 820 # separator (https://docs.python.org/3/library/os.path.html#os.path.join), 821 # we have to manually handle that case as `/` is a valid character on GCS. 822 if item[0] == os.sep: 823 return "".join([os.path.join(parent, ""), item]) 824 return os.path.join(parent, item) 825 826 top = compat.as_str_any(compat.path_to_str(top)) 827 try: 828 listing = list_directory(top) 829 except errors.NotFoundError as err: 830 if onerror: 831 onerror(err) 832 else: 833 return 834 835 files = [] 836 subdirs = [] 837 for item in listing: 838 full_path = _make_full_path(top, item) 839 if is_directory(full_path): 840 subdirs.append(item) 841 else: 842 files.append(item) 843 844 here = (top, subdirs, files) 845 846 if topdown: 847 yield here 848 849 for subdir in subdirs: 850 for subitem in walk_v2( 851 _make_full_path(top, subdir), topdown, onerror=onerror): 852 yield subitem 853 854 if not topdown: 855 yield here 856 857 858@tf_export(v1=["gfile.Stat"]) 859def stat(filename): 860 """Returns file statistics for a given path. 861 862 Args: 863 filename: string, path to a file 864 865 Returns: 866 FileStatistics struct that contains information about the path 867 868 Raises: 869 errors.OpError: If the operation fails. 870 """ 871 return stat_v2(filename) 872 873 874@tf_export("io.gfile.stat") 875def stat_v2(path): 876 """Returns file statistics for a given path. 877 878 Args: 879 path: string, path to a file 880 881 Returns: 882 FileStatistics struct that contains information about the path 883 884 Raises: 885 errors.OpError: If the operation fails. 886 """ 887 return _pywrap_file_io.Stat(compat.path_to_str(path)) 888 889 890def filecmp(filename_a, filename_b): 891 """Compare two files, returning True if they are the same, False otherwise. 892 893 We check size first and return False quickly if the files are different sizes. 894 If they are the same size, we continue to generating a crc for the whole file. 895 896 You might wonder: why not use Python's `filecmp.cmp()` instead? The answer is 897 that the builtin library is not robust to the many different filesystems 898 TensorFlow runs on, and so we here perform a similar comparison with 899 the more robust FileIO. 900 901 Args: 902 filename_a: string path to the first file. 903 filename_b: string path to the second file. 904 905 Returns: 906 True if the files are the same, False otherwise. 907 """ 908 size_a = FileIO(filename_a, "rb").size() 909 size_b = FileIO(filename_b, "rb").size() 910 if size_a != size_b: 911 return False 912 913 # Size is the same. Do a full check. 914 crc_a = file_crc32(filename_a) 915 crc_b = file_crc32(filename_b) 916 return crc_a == crc_b 917 918 919def file_crc32(filename, block_size=_DEFAULT_BLOCK_SIZE): 920 """Get the crc32 of the passed file. 921 922 The crc32 of a file can be used for error checking; two files with the same 923 crc32 are considered equivalent. Note that the entire file must be read 924 to produce the crc32. 925 926 Args: 927 filename: string, path to a file 928 block_size: Integer, process the files by reading blocks of `block_size` 929 bytes. Use -1 to read the file as once. 930 931 Returns: 932 hexadecimal as string, the crc32 of the passed file. 933 """ 934 crc = 0 935 with FileIO(filename, mode="rb") as f: 936 chunk = f.read(n=block_size) 937 while chunk: 938 crc = binascii.crc32(chunk, crc) 939 chunk = f.read(n=block_size) 940 return hex(crc & 0xFFFFFFFF) 941