• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""File IO methods that wrap the C++ FileSystem API."""
16from __future__ import absolute_import
17from __future__ import division
18from __future__ import print_function
19
20import binascii
21import os
22import uuid
23
24import six
25
26from tensorflow.python.framework import errors
27from tensorflow.python.lib.io import _pywrap_file_io
28from tensorflow.python.util import compat
29from tensorflow.python.util import deprecation
30from tensorflow.python.util.tf_export import tf_export
31
32# A good default block size depends on the system in question.
33# A somewhat conservative default chosen here.
34_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024
35
36
37class FileIO(object):
38  """FileIO class that exposes methods to read / write to / from files.
39
40  The constructor takes the following arguments:
41  name: [path-like object](https://docs.python.org/3/glossary.html#term-path-like-object)
42    giving the pathname of the file to be opened.
43  mode: one of `r`, `w`, `a`, `r+`, `w+`, `a+`. Append `b` for bytes mode.
44
45  Can be used as an iterator to iterate over lines in the file.
46
47  The default buffer size used for the BufferedInputStream used for reading
48  the file line by line is 1024 * 512 bytes.
49  """
50
51  def __init__(self, name, mode):
52    self.__name = name
53    self.__mode = mode
54    self._read_buf = None
55    self._writable_file = None
56    self._binary_mode = "b" in mode
57    mode = mode.replace("b", "")
58    if mode not in ("r", "w", "a", "r+", "w+", "a+"):
59      raise errors.InvalidArgumentError(
60          None, None, "mode is not 'r' or 'w' or 'a' or 'r+' or 'w+' or 'a+'")
61    self._read_check_passed = mode in ("r", "r+", "a+", "w+")
62    self._write_check_passed = mode in ("a", "w", "r+", "a+", "w+")
63
64  @property
65  def name(self):
66    """Returns the file name."""
67    return self.__name
68
69  @property
70  def mode(self):
71    """Returns the mode in which the file was opened."""
72    return self.__mode
73
74  def _preread_check(self):
75    if not self._read_buf:
76      if not self._read_check_passed:
77        raise errors.PermissionDeniedError(None, None,
78                                           "File isn't open for reading")
79      self._read_buf = _pywrap_file_io.BufferedInputStream(
80          compat.path_to_str(self.__name), 1024 * 512)
81
82  def _prewrite_check(self):
83    if not self._writable_file:
84      if not self._write_check_passed:
85        raise errors.PermissionDeniedError(None, None,
86                                           "File isn't open for writing")
87      self._writable_file = _pywrap_file_io.WritableFile(
88          compat.path_to_bytes(self.__name), compat.as_bytes(self.__mode))
89
90  def _prepare_value(self, val):
91    if self._binary_mode:
92      return compat.as_bytes(val)
93    else:
94      return compat.as_str_any(val)
95
96  def size(self):
97    """Returns the size of the file."""
98    return stat(self.__name).length
99
100  def write(self, file_content):
101    """Writes file_content to the file. Appends to the end of the file."""
102    self._prewrite_check()
103    self._writable_file.append(compat.as_bytes(file_content))
104
105  def read(self, n=-1):
106    """Returns the contents of a file as a string.
107
108    Starts reading from current position in file.
109
110    Args:
111      n: Read `n` bytes if `n != -1`. If `n = -1`, reads to end of file.
112
113    Returns:
114      `n` bytes of the file (or whole file) in bytes mode or `n` bytes of the
115      string if in string (regular) mode.
116    """
117    self._preread_check()
118    if n == -1:
119      length = self.size() - self.tell()
120    else:
121      length = n
122    return self._prepare_value(self._read_buf.read(length))
123
124  @deprecation.deprecated_args(
125      None, "position is deprecated in favor of the offset argument.",
126      "position")
127  def seek(self, offset=None, whence=0, position=None):
128    # TODO(jhseu): Delete later. Used to omit `position` from docs.
129    # pylint: disable=g-doc-args
130    """Seeks to the offset in the file.
131
132    Args:
133      offset: The byte count relative to the whence argument.
134      whence: Valid values for whence are:
135        0: start of the file (default)
136        1: relative to the current position of the file
137        2: relative to the end of file. `offset` is usually negative.
138    """
139    # pylint: enable=g-doc-args
140    self._preread_check()
141    # We needed to make offset a keyword argument for backwards-compatibility.
142    # This check exists so that we can convert back to having offset be a
143    # positional argument.
144    # TODO(jhseu): Make `offset` a positional argument after `position` is
145    # deleted.
146    if offset is None and position is None:
147      raise TypeError("seek(): offset argument required")
148    if offset is not None and position is not None:
149      raise TypeError("seek(): offset and position may not be set "
150                      "simultaneously.")
151
152    if position is not None:
153      offset = position
154
155    if whence == 0:
156      pass
157    elif whence == 1:
158      offset += self.tell()
159    elif whence == 2:
160      offset += self.size()
161    else:
162      raise errors.InvalidArgumentError(
163          None, None,
164          "Invalid whence argument: {}. Valid values are 0, 1, or 2.".format(
165              whence))
166    self._read_buf.seek(offset)
167
168  def readline(self):
169    r"""Reads the next line, keeping \n. At EOF, returns ''."""
170    self._preread_check()
171    return self._prepare_value(self._read_buf.readline())
172
173  def readlines(self):
174    """Returns all lines from the file in a list."""
175    self._preread_check()
176    lines = []
177    while True:
178      s = self.readline()
179      if not s:
180        break
181      lines.append(s)
182    return lines
183
184  def tell(self):
185    """Returns the current position in the file."""
186    if self._read_check_passed:
187      self._preread_check()
188      return self._read_buf.tell()
189    else:
190      self._prewrite_check()
191
192      return self._writable_file.tell()
193
194  def __enter__(self):
195    """Make usable with "with" statement."""
196    return self
197
198  def __exit__(self, unused_type, unused_value, unused_traceback):
199    """Make usable with "with" statement."""
200    self.close()
201
202  def __iter__(self):
203    return self
204
205  def __next__(self):
206    retval = self.readline()
207    if not retval:
208      raise StopIteration()
209    return retval
210
211  def next(self):
212    return self.__next__()
213
214  def flush(self):
215    """Flushes the Writable file.
216
217    This only ensures that the data has made its way out of the process without
218    any guarantees on whether it's written to disk. This means that the
219    data would survive an application crash but not necessarily an OS crash.
220    """
221    if self._writable_file:
222      self._writable_file.flush()
223
224  def close(self):
225    """Closes FileIO. Should be called for the WritableFile to be flushed."""
226    self._read_buf = None
227    if self._writable_file:
228      self._writable_file.close()
229      self._writable_file = None
230
231  def seekable(self):
232    """Returns True as FileIO supports random access ops of seek()/tell()"""
233    return True
234
235
236@tf_export(v1=["gfile.Exists"])
237def file_exists(filename):
238  """Determines whether a path exists or not.
239
240  Args:
241    filename: string, a path
242
243  Returns:
244    True if the path exists, whether it's a file or a directory.
245    False if the path does not exist and there are no filesystem errors.
246
247  Raises:
248    errors.OpError: Propagates any errors reported by the FileSystem API.
249  """
250  return file_exists_v2(filename)
251
252
253@tf_export("io.gfile.exists")
254def file_exists_v2(path):
255  """Determines whether a path exists or not.
256
257  Args:
258    path: string, a path
259
260  Returns:
261    True if the path exists, whether it's a file or a directory.
262    False if the path does not exist and there are no filesystem errors.
263
264  Raises:
265    errors.OpError: Propagates any errors reported by the FileSystem API.
266  """
267  try:
268    _pywrap_file_io.FileExists(compat.path_to_bytes(path))
269  except errors.NotFoundError:
270    return False
271  return True
272
273
274@tf_export(v1=["gfile.Remove"])
275def delete_file(filename):
276  """Deletes the file located at 'filename'.
277
278  Args:
279    filename: string, a filename
280
281  Raises:
282    errors.OpError: Propagates any errors reported by the FileSystem API.  E.g.,
283    `NotFoundError` if the file does not exist.
284  """
285  delete_file_v2(filename)
286
287
288@tf_export("io.gfile.remove")
289def delete_file_v2(path):
290  """Deletes the path located at 'path'.
291
292  Args:
293    path: string, a path
294
295  Raises:
296    errors.OpError: Propagates any errors reported by the FileSystem API.  E.g.,
297    `NotFoundError` if the path does not exist.
298  """
299  _pywrap_file_io.DeleteFile(compat.path_to_bytes(path))
300
301
302def read_file_to_string(filename, binary_mode=False):
303  """Reads the entire contents of a file to a string.
304
305  Args:
306    filename: string, path to a file
307    binary_mode: whether to open the file in binary mode or not. This changes
308      the type of the object returned.
309
310  Returns:
311    contents of the file as a string or bytes.
312
313  Raises:
314    errors.OpError: Raises variety of errors that are subtypes e.g.
315    `NotFoundError` etc.
316  """
317  if binary_mode:
318    f = FileIO(filename, mode="rb")
319  else:
320    f = FileIO(filename, mode="r")
321  return f.read()
322
323
324def write_string_to_file(filename, file_content):
325  """Writes a string to a given file.
326
327  Args:
328    filename: string, path to a file
329    file_content: string, contents that need to be written to the file
330
331  Raises:
332    errors.OpError: If there are errors during the operation.
333  """
334  with FileIO(filename, mode="w") as f:
335    f.write(file_content)
336
337
338@tf_export(v1=["gfile.Glob"])
339def get_matching_files(filename):
340  """Returns a list of files that match the given pattern(s).
341
342  Args:
343    filename: string or iterable of strings. The glob pattern(s).
344
345  Returns:
346    A list of strings containing filenames that match the given pattern(s).
347
348  Raises:
349  *  errors.OpError: If there are filesystem / directory listing errors.
350  *  errors.NotFoundError: If pattern to be matched is an invalid directory.
351  """
352  return get_matching_files_v2(filename)
353
354
355@tf_export("io.gfile.glob")
356def get_matching_files_v2(pattern):
357  r"""Returns a list of files that match the given pattern(s).
358
359  The patterns are defined as strings. Supported patterns are defined
360  here. Note that the pattern can be a Python iteratable of string patterns.
361
362  The format definition of the pattern is:
363
364  **pattern**: `{ term }`
365
366  **term**:
367    * `'*'`: matches any sequence of non-'/' characters
368    * `'?'`: matches a single non-'/' character
369    * `'[' [ '^' ] { match-list } ']'`: matches any single
370      character (not) on the list
371    * `c`: matches character `c`  where `c != '*', '?', '\\', '['`
372    * `'\\' c`: matches character `c`
373
374  **character range**:
375    * `c`: matches character `c` while `c != '\\', '-', ']'`
376    * `'\\' c`: matches character `c`
377    * `lo '-' hi`: matches character `c` for `lo <= c <= hi`
378
379  Examples:
380
381  >>> tf.io.gfile.glob("*.py")
382  ... # For example, ['__init__.py']
383
384  >>> tf.io.gfile.glob("__init__.??")
385  ... # As above
386
387  >>> files = {"*.py"}
388  >>> the_iterator = iter(files)
389  >>> tf.io.gfile.glob(the_iterator)
390  ... # As above
391
392  See the C++ function `GetMatchingPaths` in
393  [`core/platform/file_system.h`]
394  (../../../core/platform/file_system.h)
395  for implementation details.
396
397  Args:
398    pattern: string or iterable of strings. The glob pattern(s).
399
400  Returns:
401    A list of strings containing filenames that match the given pattern(s).
402
403  Raises:
404    errors.OpError: If there are filesystem / directory listing errors.
405    errors.NotFoundError: If pattern to be matched is an invalid directory.
406  """
407  if isinstance(pattern, six.string_types):
408    return [
409        # Convert the filenames to string from bytes.
410        compat.as_str_any(matching_filename)
411        for matching_filename in _pywrap_file_io.GetMatchingFiles(
412            compat.as_bytes(pattern))
413    ]
414  else:
415    return [
416        # Convert the filenames to string from bytes.
417        compat.as_str_any(matching_filename)  # pylint: disable=g-complex-comprehension
418        for single_filename in pattern
419        for matching_filename in _pywrap_file_io.GetMatchingFiles(
420            compat.as_bytes(single_filename))
421    ]
422
423
424@tf_export(v1=["gfile.MkDir"])
425def create_dir(dirname):
426  """Creates a directory with the name `dirname`.
427
428  Args:
429    dirname: string, name of the directory to be created
430
431  Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs`
432    instead if there is the possibility that the parent dirs don't exist.
433
434  Raises:
435    errors.OpError: If the operation fails.
436  """
437  create_dir_v2(dirname)
438
439
440@tf_export("io.gfile.mkdir")
441def create_dir_v2(path):
442  """Creates a directory with the name given by `path`.
443
444  Args:
445    path: string, name of the directory to be created
446
447  Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs`
448    instead if there is the possibility that the parent dirs don't exist.
449
450  Raises:
451    errors.OpError: If the operation fails.
452  """
453  _pywrap_file_io.CreateDir(compat.path_to_bytes(path))
454
455
456@tf_export(v1=["gfile.MakeDirs"])
457def recursive_create_dir(dirname):
458  """Creates a directory and all parent/intermediate directories.
459
460  It succeeds if dirname already exists and is writable.
461
462  Args:
463    dirname: string, name of the directory to be created
464
465  Raises:
466    errors.OpError: If the operation fails.
467  """
468  recursive_create_dir_v2(dirname)
469
470
471@tf_export("io.gfile.makedirs")
472def recursive_create_dir_v2(path):
473  """Creates a directory and all parent/intermediate directories.
474
475  It succeeds if path already exists and is writable.
476
477  Args:
478    path: string, name of the directory to be created
479
480  Raises:
481    errors.OpError: If the operation fails.
482  """
483  _pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path))
484
485
486@tf_export(v1=["gfile.Copy"])
487def copy(oldpath, newpath, overwrite=False):
488  """Copies data from `oldpath` to `newpath`.
489
490  Args:
491    oldpath: string, name of the file who's contents need to be copied
492    newpath: string, name of the file to which to copy to
493    overwrite: boolean, if false it's an error for `newpath` to be occupied by
494      an existing file.
495
496  Raises:
497    errors.OpError: If the operation fails.
498  """
499  copy_v2(oldpath, newpath, overwrite)
500
501
502@tf_export("io.gfile.copy")
503def copy_v2(src, dst, overwrite=False):
504  """Copies data from `src` to `dst`.
505
506  Args:
507    src: string, name of the file whose contents need to be copied
508    dst: string, name of the file to which to copy to
509    overwrite: boolean, if false it's an error for `dst` to be occupied by an
510      existing file.
511
512  Raises:
513    errors.OpError: If the operation fails.
514  """
515  _pywrap_file_io.CopyFile(
516      compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite)
517
518
519@tf_export(v1=["gfile.Rename"])
520def rename(oldname, newname, overwrite=False):
521  """Rename or move a file / directory.
522
523  Args:
524    oldname: string, pathname for a file
525    newname: string, pathname to which the file needs to be moved
526    overwrite: boolean, if false it's an error for `newname` to be occupied by
527      an existing file.
528
529  Raises:
530    errors.OpError: If the operation fails.
531  """
532  rename_v2(oldname, newname, overwrite)
533
534
535@tf_export("io.gfile.rename")
536def rename_v2(src, dst, overwrite=False):
537  """Rename or move a file / directory.
538
539  Args:
540    src: string, pathname for a file
541    dst: string, pathname to which the file needs to be moved
542    overwrite: boolean, if false it's an error for `dst` to be occupied by an
543      existing file.
544
545  Raises:
546    errors.OpError: If the operation fails.
547  """
548  _pywrap_file_io.RenameFile(
549      compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite)
550
551
552def atomic_write_string_to_file(filename, contents, overwrite=True):
553  """Writes to `filename` atomically.
554
555  This means that when `filename` appears in the filesystem, it will contain
556  all of `contents`. With write_string_to_file, it is possible for the file
557  to appear in the filesystem with `contents` only partially written.
558
559  Accomplished by writing to a temp file and then renaming it.
560
561  Args:
562    filename: string, pathname for a file
563    contents: string, contents that need to be written to the file
564    overwrite: boolean, if false it's an error for `filename` to be occupied by
565      an existing file.
566  """
567  if not has_atomic_move(filename):
568    write_string_to_file(filename, contents)
569  else:
570    temp_pathname = filename + ".tmp" + uuid.uuid4().hex
571    write_string_to_file(temp_pathname, contents)
572    try:
573      rename(temp_pathname, filename, overwrite)
574    except errors.OpError:
575      delete_file(temp_pathname)
576      raise
577
578
579@tf_export(v1=["gfile.DeleteRecursively"])
580def delete_recursively(dirname):
581  """Deletes everything under dirname recursively.
582
583  Args:
584    dirname: string, a path to a directory
585
586  Raises:
587    errors.OpError: If the operation fails.
588  """
589  delete_recursively_v2(dirname)
590
591
592@tf_export("io.gfile.rmtree")
593def delete_recursively_v2(path):
594  """Deletes everything under path recursively.
595
596  Args:
597    path: string, a path
598
599  Raises:
600    errors.OpError: If the operation fails.
601  """
602  _pywrap_file_io.DeleteRecursively(compat.path_to_bytes(path))
603
604
605@tf_export(v1=["gfile.IsDirectory"])
606def is_directory(dirname):
607  """Returns whether the path is a directory or not.
608
609  Args:
610    dirname: string, path to a potential directory
611
612  Returns:
613    True, if the path is a directory; False otherwise
614  """
615  return is_directory_v2(dirname)
616
617
618@tf_export("io.gfile.isdir")
619def is_directory_v2(path):
620  """Returns whether the path is a directory or not.
621
622  Args:
623    path: string, path to a potential directory
624
625  Returns:
626    True, if the path is a directory; False otherwise
627  """
628  try:
629    return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path))
630  except errors.OpError:
631    return False
632
633
634def has_atomic_move(path):
635  """Checks whether the file system supports atomic moves.
636
637  Returns whether or not the file system of the given path supports the atomic
638  move operation for a file or folder.  If atomic move is supported, it is
639  recommended to use a temp location for writing and then move to the final
640  location.
641
642  Args:
643    path: string, path to a file
644
645  Returns:
646    True, if the path is on a file system that supports atomic move
647    False, if the file system does not support atomic move. In such cases
648           we need to be careful about using moves. In some cases it is safer
649           not to use temporary locations in this case.
650  """
651  try:
652    return _pywrap_file_io.HasAtomicMove(compat.path_to_bytes(path))
653  except errors.OpError:
654    # defaults to True
655    return True
656
657
658@tf_export(v1=["gfile.ListDirectory"])
659def list_directory(dirname):
660  """Returns a list of entries contained within a directory.
661
662  The list is in arbitrary order. It does not contain the special entries "."
663  and "..".
664
665  Args:
666    dirname: string, path to a directory
667
668  Returns:
669    [filename1, filename2, ... filenameN] as strings
670
671  Raises:
672    errors.NotFoundError if directory doesn't exist
673  """
674  return list_directory_v2(dirname)
675
676
677@tf_export("io.gfile.listdir")
678def list_directory_v2(path):
679  """Returns a list of entries contained within a directory.
680
681  The list is in arbitrary order. It does not contain the special entries "."
682  and "..".
683
684  Args:
685    path: string, path to a directory
686
687  Returns:
688    [filename1, filename2, ... filenameN] as strings
689
690  Raises:
691    errors.NotFoundError if directory doesn't exist
692  """
693  if not is_directory(path):
694    raise errors.NotFoundError(
695        node_def=None,
696        op=None,
697        message="Could not find directory {}".format(path))
698
699  # Convert each element to string, since the return values of the
700  # vector of string should be interpreted as strings, not bytes.
701  return [
702      compat.as_str_any(filename)
703      for filename in _pywrap_file_io.GetChildren(compat.path_to_bytes(path))
704  ]
705
706
707@tf_export(v1=["gfile.Walk"])
708def walk(top, in_order=True):
709  """Recursive directory tree generator for directories.
710
711  Args:
712    top: string, a Directory name
713    in_order: bool, Traverse in order if True, post order if False.  Errors that
714      happen while listing directories are ignored.
715
716  Yields:
717    Each yield is a 3-tuple:  the pathname of a directory, followed by lists of
718    all its subdirectories and leaf files. That is, each yield looks like:
719    `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`.
720    Each item is a string.
721  """
722  return walk_v2(top, in_order)
723
724
725@tf_export("io.gfile.walk")
726def walk_v2(top, topdown=True, onerror=None):
727  """Recursive directory tree generator for directories.
728
729  Args:
730    top: string, a Directory name
731    topdown: bool, Traverse pre order if True, post order if False.
732    onerror: optional handler for errors. Should be a function, it will be
733      called with the error as argument. Rethrowing the error aborts the walk.
734      Errors that happen while listing directories are ignored.
735
736  Yields:
737    Each yield is a 3-tuple:  the pathname of a directory, followed by lists of
738    all its subdirectories and leaf files. That is, each yield looks like:
739    `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`.
740    Each item is a string.
741  """
742
743  def _make_full_path(parent, item):
744    # Since `os.path.join` discards paths before one that starts with the path
745    # separator (https://docs.python.org/3/library/os.path.html#os.path.join),
746    # we have to manually handle that case as `/` is a valid character on GCS.
747    if item[0] == os.sep:
748      return "".join([os.path.join(parent, ""), item])
749    return os.path.join(parent, item)
750
751  top = compat.as_str_any(compat.path_to_str(top))
752  try:
753    listing = list_directory(top)
754  except errors.NotFoundError as err:
755    if onerror:
756      onerror(err)
757    else:
758      return
759
760  files = []
761  subdirs = []
762  for item in listing:
763    full_path = _make_full_path(top, item)
764    if is_directory(full_path):
765      subdirs.append(item)
766    else:
767      files.append(item)
768
769  here = (top, subdirs, files)
770
771  if topdown:
772    yield here
773
774  for subdir in subdirs:
775    for subitem in walk_v2(
776        _make_full_path(top, subdir), topdown, onerror=onerror):
777      yield subitem
778
779  if not topdown:
780    yield here
781
782
783@tf_export(v1=["gfile.Stat"])
784def stat(filename):
785  """Returns file statistics for a given path.
786
787  Args:
788    filename: string, path to a file
789
790  Returns:
791    FileStatistics struct that contains information about the path
792
793  Raises:
794    errors.OpError: If the operation fails.
795  """
796  return stat_v2(filename)
797
798
799@tf_export("io.gfile.stat")
800def stat_v2(path):
801  """Returns file statistics for a given path.
802
803  Args:
804    path: string, path to a file
805
806  Returns:
807    FileStatistics struct that contains information about the path
808
809  Raises:
810    errors.OpError: If the operation fails.
811  """
812  return _pywrap_file_io.Stat(compat.path_to_str(path))
813
814
815def filecmp(filename_a, filename_b):
816  """Compare two files, returning True if they are the same, False otherwise.
817
818  We check size first and return False quickly if the files are different sizes.
819  If they are the same size, we continue to generating a crc for the whole file.
820
821  You might wonder: why not use Python's `filecmp.cmp()` instead? The answer is
822  that the builtin library is not robust to the many different filesystems
823  TensorFlow runs on, and so we here perform a similar comparison with
824  the more robust FileIO.
825
826  Args:
827    filename_a: string path to the first file.
828    filename_b: string path to the second file.
829
830  Returns:
831    True if the files are the same, False otherwise.
832  """
833  size_a = FileIO(filename_a, "rb").size()
834  size_b = FileIO(filename_b, "rb").size()
835  if size_a != size_b:
836    return False
837
838  # Size is the same. Do a full check.
839  crc_a = file_crc32(filename_a)
840  crc_b = file_crc32(filename_b)
841  return crc_a == crc_b
842
843
844def file_crc32(filename, block_size=_DEFAULT_BLOCK_SIZE):
845  """Get the crc32 of the passed file.
846
847  The crc32 of a file can be used for error checking; two files with the same
848  crc32 are considered equivalent. Note that the entire file must be read
849  to produce the crc32.
850
851  Args:
852    filename: string, path to a file
853    block_size: Integer, process the files by reading blocks of `block_size`
854      bytes. Use -1 to read the file as once.
855
856  Returns:
857    hexadecimal as string, the crc32 of the passed file.
858  """
859  crc = 0
860  with FileIO(filename, mode="rb") as f:
861    chunk = f.read(n=block_size)
862    while chunk:
863      crc = binascii.crc32(chunk, crc)
864      chunk = f.read(n=block_size)
865  return hex(crc & 0xFFFFFFFF)
866