• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_
17 #define TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_
18 
19 #include <stdint.h>
20 #include <functional>
21 #include <string>
22 #include <unordered_map>
23 #include <vector>
24 #include "tensorflow/core/lib/core/errors.h"
25 #include "tensorflow/core/lib/core/status.h"
26 #include "tensorflow/core/lib/core/stringpiece.h"
27 #include "tensorflow/core/platform/cord.h"
28 #include "tensorflow/core/platform/file_statistics.h"
29 #include "tensorflow/core/platform/macros.h"
30 #include "tensorflow/core/platform/platform.h"
31 #include "tensorflow/core/platform/types.h"
32 
33 #ifdef PLATFORM_WINDOWS
34 #undef DeleteFile
35 #endif
36 
37 namespace tensorflow {
38 
39 class RandomAccessFile;
40 class ReadOnlyMemoryRegion;
41 class WritableFile;
42 
43 /// A generic interface for accessing a file system.  Implementations
44 /// of custom filesystem adapters must implement this interface,
45 /// RandomAccessFile, WritableFile, and ReadOnlyMemoryRegion classes.
46 class FileSystem {
47  public:
48   /// \brief Creates a brand new random access read-only file with the
49   /// specified name.
50   ///
51   /// On success, stores a pointer to the new file in
52   /// *result and returns OK.  On failure stores NULL in *result and
53   /// returns non-OK.  If the file does not exist, returns a non-OK
54   /// status.
55   ///
56   /// The returned file may be concurrently accessed by multiple threads.
57   ///
58   /// The ownership of the returned RandomAccessFile is passed to the caller
59   /// and the object should be deleted when is not used.
60   virtual Status NewRandomAccessFile(
61       const string& fname, std::unique_ptr<RandomAccessFile>* result) = 0;
62 
63   /// \brief Creates an object that writes to a new file with the specified
64   /// name.
65   ///
66   /// Deletes any existing file with the same name and creates a
67   /// new file.  On success, stores a pointer to the new file in
68   /// *result and returns OK.  On failure stores NULL in *result and
69   /// returns non-OK.
70   ///
71   /// The returned file will only be accessed by one thread at a time.
72   ///
73   /// The ownership of the returned WritableFile is passed to the caller
74   /// and the object should be deleted when is not used.
75   virtual Status NewWritableFile(const string& fname,
76                                  std::unique_ptr<WritableFile>* result) = 0;
77 
78   /// \brief Creates an object that either appends to an existing file, or
79   /// writes to a new file (if the file does not exist to begin with).
80   ///
81   /// On success, stores a pointer to the new file in *result and
82   /// returns OK.  On failure stores NULL in *result and returns
83   /// non-OK.
84   ///
85   /// The returned file will only be accessed by one thread at a time.
86   ///
87   /// The ownership of the returned WritableFile is passed to the caller
88   /// and the object should be deleted when is not used.
89   virtual Status NewAppendableFile(const string& fname,
90                                    std::unique_ptr<WritableFile>* result) = 0;
91 
92   /// \brief Creates a readonly region of memory with the file context.
93   ///
94   /// On success, it returns a pointer to read-only memory region
95   /// from the content of file fname. The ownership of the region is passed to
96   /// the caller. On failure stores nullptr in *result and returns non-OK.
97   ///
98   /// The returned memory region can be accessed from many threads in parallel.
99   ///
100   /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller
101   /// and the object should be deleted when is not used.
102   virtual Status NewReadOnlyMemoryRegionFromFile(
103       const string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) = 0;
104 
105   /// Returns OK if the named path exists and NOT_FOUND otherwise.
106   virtual Status FileExists(const string& fname) = 0;
107 
108   /// Returns true if all the listed files exist, false otherwise.
109   /// if status is not null, populate the vector with a detailed status
110   /// for each file.
111   virtual bool FilesExist(const std::vector<string>& files,
112                           std::vector<Status>* status);
113 
114   /// \brief Returns the immediate children in the given directory.
115   ///
116   /// The returned paths are relative to 'dir'.
117   virtual Status GetChildren(const string& dir,
118                              std::vector<string>* result) = 0;
119 
120   /// \brief Given a pattern, stores in *results the set of paths that matches
121   /// that pattern. *results is cleared.
122   ///
123   /// pattern must match all of a name, not just a substring.
124   ///
125   /// pattern: { term }
126   /// term:
127   ///   '*': matches any sequence of non-'/' characters
128   ///   '?': matches a single non-'/' character
129   ///   '[' [ '^' ] { match-list } ']':
130   ///        matches any single character (not) on the list
131   ///   c: matches character c (c != '*', '?', '\\', '[')
132   ///   '\\' c: matches character c
133   /// character-range:
134   ///   c: matches character c (c != '\\', '-', ']')
135   ///   '\\' c: matches character c
136   ///   lo '-' hi: matches character c for lo <= c <= hi
137   ///
138   /// Typical return codes:
139   ///  * OK - no errors
140   ///  * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not
141   ///                    implemented
142   virtual Status GetMatchingPaths(const string& pattern,
143                                   std::vector<string>* results) = 0;
144 
145   /// \brief Obtains statistics for the given path.
146   virtual Status Stat(const string& fname, FileStatistics* stat) = 0;
147 
148   /// \brief Deletes the named file.
149   virtual Status DeleteFile(const string& fname) = 0;
150 
151   /// \brief Creates the specified directory.
152   /// Typical return codes:
153   ///  * OK - successfully created the directory.
154   ///  * ALREADY_EXISTS - directory with name dirname already exists.
155   ///  * PERMISSION_DENIED - dirname is not writable.
156   virtual Status CreateDir(const string& dirname) = 0;
157 
158   /// \brief Creates the specified directory and all the necessary
159   /// subdirectories.
160   /// Typical return codes:
161   ///  * OK - successfully created the directory and sub directories, even if
162   ///         they were already created.
163   ///  * PERMISSION_DENIED - dirname or some subdirectory is not writable.
164   virtual Status RecursivelyCreateDir(const string& dirname);
165 
166   /// \brief Deletes the specified directory.
167   virtual Status DeleteDir(const string& dirname) = 0;
168 
169   /// \brief Deletes the specified directory and all subdirectories and files
170   /// underneath it. This is accomplished by traversing the directory tree
171   /// rooted at dirname and deleting entries as they are encountered.
172   ///
173   /// If dirname itself is not readable or does not exist, *undeleted_dir_count
174   /// is set to 1, *undeleted_file_count is set to 0 and an appropriate status
175   /// (e.g. NOT_FOUND) is returned.
176   ///
177   /// If dirname and all its descendants were successfully deleted, TF_OK is
178   /// returned and both error counters are set to zero.
179   ///
180   /// Otherwise, while traversing the tree, undeleted_file_count and
181   /// undeleted_dir_count are updated if an entry of the corresponding type
182   /// could not be deleted. The returned error status represents the reason that
183   /// any one of these entries could not be deleted.
184   ///
185   /// REQUIRES: undeleted_files, undeleted_dirs to be not null.
186   ///
187   /// Typical return codes:
188   ///  * OK - dirname exists and we were able to delete everything underneath.
189   ///  * NOT_FOUND - dirname doesn't exist
190   ///  * PERMISSION_DENIED - dirname or some descendant is not writable
191   ///  * UNIMPLEMENTED - Some underlying functions (like Delete) are not
192   ///                    implemented
193   virtual Status DeleteRecursively(const string& dirname,
194                                    int64* undeleted_files,
195                                    int64* undeleted_dirs);
196 
197   /// \brief Stores the size of `fname` in `*file_size`.
198   virtual Status GetFileSize(const string& fname, uint64* file_size) = 0;
199 
200   /// \brief Overwrites the target if it exists.
201   virtual Status RenameFile(const string& src, const string& target) = 0;
202 
203   /// \brief Copy the src to target.
204   virtual Status CopyFile(const string& src, const string& target);
205 
206   /// \brief Translate an URI to a filename for the FileSystem implementation.
207   ///
208   /// The implementation in this class cleans up the path, removing
209   /// duplicate /'s, resolving .. and . (more details in
210   /// tensorflow::lib::io::CleanPath).
211   virtual string TranslateName(const string& name) const;
212 
213   /// \brief Returns whether the given path is a directory or not.
214   ///
215   /// Typical return codes (not guaranteed exhaustive):
216   ///  * OK - The path exists and is a directory.
217   ///  * FAILED_PRECONDITION - The path exists and is not a directory.
218   ///  * NOT_FOUND - The path entry does not exist.
219   ///  * PERMISSION_DENIED - Insufficient permissions.
220   ///  * UNIMPLEMENTED - The file factory doesn't support directories.
221   virtual Status IsDirectory(const string& fname);
222 
223   /// \brief Flushes any cached filesystem objects from memory.
224   virtual void FlushCaches();
225 
FileSystem()226   FileSystem() {}
227 
228   virtual ~FileSystem();
229 };
230 
231 /// A file abstraction for randomly reading the contents of a file.
232 class RandomAccessFile {
233  public:
RandomAccessFile()234   RandomAccessFile() {}
235   virtual ~RandomAccessFile();
236 
237   /// \brief Returns the name of the file.
238   ///
239   /// This is an optional operation that may not be implemented by every
240   /// filesystem.
Name(StringPiece * result)241   virtual Status Name(StringPiece* result) const {
242     return errors::Unimplemented("This filesystem does not support Name()");
243   }
244 
245   /// \brief Reads up to `n` bytes from the file starting at `offset`.
246   ///
247   /// `scratch[0..n-1]` may be written by this routine.  Sets `*result`
248   /// to the data that was read (including if fewer than `n` bytes were
249   /// successfully read).  May set `*result` to point at data in
250   /// `scratch[0..n-1]`, so `scratch[0..n-1]` must be live when
251   /// `*result` is used.
252   ///
253   /// On OK returned status: `n` bytes have been stored in `*result`.
254   /// On non-OK returned status: `[0..n]` bytes have been stored in `*result`.
255   ///
256   /// Returns `OUT_OF_RANGE` if fewer than n bytes were stored in `*result`
257   /// because of EOF.
258   ///
259   /// Safe for concurrent use by multiple threads.
260   virtual Status Read(uint64 offset, size_t n, StringPiece* result,
261                       char* scratch) const = 0;
262 
263  private:
264   TF_DISALLOW_COPY_AND_ASSIGN(RandomAccessFile);
265 };
266 
267 /// \brief A file abstraction for sequential writing.
268 ///
269 /// The implementation must provide buffering since callers may append
270 /// small fragments at a time to the file.
271 class WritableFile {
272  public:
WritableFile()273   WritableFile() {}
274   virtual ~WritableFile();
275 
276   /// \brief Append 'data' to the file.
277   virtual Status Append(StringPiece data) = 0;
278 
279   // TODO(ebrevdo): Remove this ifdef when absl is updated.
280 #if defined(PLATFORM_GOOGLE)
281   // \brief Append 'data' to the file.
Append(const absl::Cord & cord)282   virtual Status Append(const absl::Cord& cord) {
283     return errors::Unimplemented("Append(absl::Cord) is not implemented");
284   }
285 #endif
286 
287   /// \brief Close the file.
288   ///
289   /// Flush() and de-allocate resources associated with this file
290   ///
291   /// Typical return codes (not guaranteed to be exhaustive):
292   ///  * OK
293   ///  * Other codes, as returned from Flush()
294   virtual Status Close() = 0;
295 
296   /// \brief Flushes the file and optionally syncs contents to filesystem.
297   ///
298   /// This should flush any local buffers whose contents have not been
299   /// delivered to the filesystem.
300   ///
301   /// If the process terminates after a successful flush, the contents
302   /// may still be persisted, since the underlying filesystem may
303   /// eventually flush the contents.  If the OS or machine crashes
304   /// after a successful flush, the contents may or may not be
305   /// persisted, depending on the implementation.
306   virtual Status Flush() = 0;
307 
308   // \brief Returns the name of the file.
309   ///
310   /// This is an optional operation that may not be implemented by every
311   /// filesystem.
Name(StringPiece * result)312   virtual Status Name(StringPiece* result) const {
313     return errors::Unimplemented("This filesystem does not support Name()");
314   }
315 
316   /// \brief Syncs contents of file to filesystem.
317   ///
318   /// This waits for confirmation from the filesystem that the contents
319   /// of the file have been persisted to the filesystem; if the OS
320   /// or machine crashes after a successful Sync, the contents should
321   /// be properly saved.
322   virtual Status Sync() = 0;
323 
324   /// \brief Retrieves the current write position in the file, or -1 on
325   /// error.
326   ///
327   /// This is an optional operation, subclasses may choose to return
328   /// errors::Unimplemented.
Tell(int64 * position)329   virtual Status Tell(int64* position) {
330     *position = -1;
331     return errors::Unimplemented("This filesystem does not support Tell()");
332   }
333 
334  private:
335   TF_DISALLOW_COPY_AND_ASSIGN(WritableFile);
336 };
337 
338 /// \brief A readonly memmapped file abstraction.
339 ///
340 /// The implementation must guarantee that all memory is accessible when the
341 /// object exists, independently from the Env that created it.
342 class ReadOnlyMemoryRegion {
343  public:
ReadOnlyMemoryRegion()344   ReadOnlyMemoryRegion() {}
345   virtual ~ReadOnlyMemoryRegion() = default;
346 
347   /// \brief Returns a pointer to the memory region.
348   virtual const void* data() = 0;
349 
350   /// \brief Returns the length of the memory region in bytes.
351   virtual uint64 length() = 0;
352 };
353 
354 /// \brief A registry for file system implementations.
355 ///
356 /// Filenames are specified as an URI, which is of the form
357 /// [scheme://]<filename>.
358 /// File system implementations are registered using the REGISTER_FILE_SYSTEM
359 /// macro, providing the 'scheme' as the key.
360 class FileSystemRegistry {
361  public:
362   typedef std::function<FileSystem*()> Factory;
363 
364   virtual ~FileSystemRegistry();
365   virtual Status Register(const string& scheme, Factory factory) = 0;
366   virtual FileSystem* Lookup(const string& scheme) = 0;
367   virtual Status GetRegisteredFileSystemSchemes(
368       std::vector<string>* schemes) = 0;
369 };
370 
371 }  // namespace tensorflow
372 
373 #endif  // TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_
374