1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 17 #define TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 18 19 #include <stdint.h> 20 21 #include <functional> 22 #include <string> 23 #include <unordered_map> 24 #include <utility> 25 #include <vector> 26 27 #include "tensorflow/core/platform/cord.h" 28 #include "tensorflow/core/platform/errors.h" 29 #include "tensorflow/core/platform/file_statistics.h" 30 #include "tensorflow/core/platform/macros.h" 31 #include "tensorflow/core/platform/platform.h" 32 #include "tensorflow/core/platform/stringpiece.h" 33 #include "tensorflow/core/platform/types.h" 34 35 #ifdef PLATFORM_WINDOWS 36 #undef DeleteFile 37 #undef CopyFile 38 #undef TranslateName 39 #endif 40 41 namespace tensorflow { 42 43 class RandomAccessFile; 44 class ReadOnlyMemoryRegion; 45 class WritableFile; 46 47 class FileSystem; 48 struct TransactionToken { 49 FileSystem* owner; 50 void* token; 51 }; 52 53 /// A generic interface for accessing a file system. Implementations 54 /// of custom filesystem adapters must implement this interface, 55 /// RandomAccessFile, WritableFile, and ReadOnlyMemoryRegion classes. 56 class FileSystem { 57 public: 58 /// \brief Creates a brand new random access read-only file with the 59 /// specified name. 60 /// 61 /// On success, stores a pointer to the new file in 62 /// *result and returns OK. On failure stores NULL in *result and 63 /// returns non-OK. If the file does not exist, returns a non-OK 64 /// status. 65 /// 66 /// The returned file may be concurrently accessed by multiple threads. 67 /// 68 /// The ownership of the returned RandomAccessFile is passed to the caller 69 /// and the object should be deleted when is not used. NewRandomAccessFile(const std::string & fname,std::unique_ptr<RandomAccessFile> * result)70 virtual tensorflow::Status NewRandomAccessFile( 71 const std::string& fname, std::unique_ptr<RandomAccessFile>* result) { 72 return NewRandomAccessFile(fname, nullptr, result); 73 }; 74 NewRandomAccessFile(const std::string & fname,TransactionToken * token,std::unique_ptr<RandomAccessFile> * result)75 virtual tensorflow::Status NewRandomAccessFile( 76 const std::string& fname, TransactionToken* token, 77 std::unique_ptr<RandomAccessFile>* result) { 78 // We duplicate these methods due to Google internal coding style prevents 79 // virtual functions with default arguments. See PR #41615. 80 return Status::OK(); 81 } 82 83 /// \brief Creates an object that writes to a new file with the specified 84 /// name. 85 /// 86 /// Deletes any existing file with the same name and creates a 87 /// new file. On success, stores a pointer to the new file in 88 /// *result and returns OK. On failure stores NULL in *result and 89 /// returns non-OK. 90 /// 91 /// The returned file will only be accessed by one thread at a time. 92 /// 93 /// The ownership of the returned WritableFile is passed to the caller 94 /// and the object should be deleted when is not used. NewWritableFile(const std::string & fname,std::unique_ptr<WritableFile> * result)95 virtual tensorflow::Status NewWritableFile( 96 const std::string& fname, std::unique_ptr<WritableFile>* result) { 97 return NewWritableFile(fname, nullptr, result); 98 }; 99 NewWritableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)100 virtual tensorflow::Status NewWritableFile( 101 const std::string& fname, TransactionToken* token, 102 std::unique_ptr<WritableFile>* result) { 103 return Status::OK(); 104 } 105 106 /// \brief Creates an object that either appends to an existing file, or 107 /// writes to a new file (if the file does not exist to begin with). 108 /// 109 /// On success, stores a pointer to the new file in *result and 110 /// returns OK. On failure stores NULL in *result and returns 111 /// non-OK. 112 /// 113 /// The returned file will only be accessed by one thread at a time. 114 /// 115 /// The ownership of the returned WritableFile is passed to the caller 116 /// and the object should be deleted when is not used. NewAppendableFile(const std::string & fname,std::unique_ptr<WritableFile> * result)117 virtual tensorflow::Status NewAppendableFile( 118 const std::string& fname, std::unique_ptr<WritableFile>* result) { 119 return NewAppendableFile(fname, nullptr, result); 120 }; 121 NewAppendableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)122 virtual tensorflow::Status NewAppendableFile( 123 const std::string& fname, TransactionToken* token, 124 std::unique_ptr<WritableFile>* result) { 125 return Status::OK(); 126 } 127 128 /// \brief Creates a readonly region of memory with the file context. 129 /// 130 /// On success, it returns a pointer to read-only memory region 131 /// from the content of file fname. The ownership of the region is passed to 132 /// the caller. On failure stores nullptr in *result and returns non-OK. 133 /// 134 /// The returned memory region can be accessed from many threads in parallel. 135 /// 136 /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller 137 /// and the object should be deleted when is not used. NewReadOnlyMemoryRegionFromFile(const std::string & fname,std::unique_ptr<ReadOnlyMemoryRegion> * result)138 virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( 139 const std::string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) { 140 return NewReadOnlyMemoryRegionFromFile(fname, nullptr, result); 141 } 142 NewReadOnlyMemoryRegionFromFile(const std::string & fname,TransactionToken * token,std::unique_ptr<ReadOnlyMemoryRegion> * result)143 virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( 144 const std::string& fname, TransactionToken* token, 145 std::unique_ptr<ReadOnlyMemoryRegion>* result) { 146 return Status::OK(); 147 } 148 149 /// Returns OK if the named path exists and NOT_FOUND otherwise. FileExists(const std::string & fname)150 virtual tensorflow::Status FileExists(const std::string& fname) { 151 return FileExists(fname, nullptr); 152 }; 153 FileExists(const std::string & fname,TransactionToken * token)154 virtual tensorflow::Status FileExists(const std::string& fname, 155 TransactionToken* token) { 156 return Status::OK(); 157 } 158 159 /// Returns true if all the listed files exist, false otherwise. 160 /// if status is not null, populate the vector with a detailed status 161 /// for each file. FilesExist(const std::vector<string> & files,std::vector<Status> * status)162 virtual bool FilesExist(const std::vector<string>& files, 163 std::vector<Status>* status) { 164 return FilesExist(files, nullptr, status); 165 } 166 167 virtual bool FilesExist(const std::vector<string>& files, 168 TransactionToken* token, std::vector<Status>* status); 169 170 /// \brief Returns the immediate children in the given directory. 171 /// 172 /// The returned paths are relative to 'dir'. GetChildren(const std::string & dir,std::vector<string> * result)173 virtual tensorflow::Status GetChildren(const std::string& dir, 174 std::vector<string>* result) { 175 return GetChildren(dir, nullptr, result); 176 } 177 GetChildren(const std::string & dir,TransactionToken * token,std::vector<string> * result)178 virtual tensorflow::Status GetChildren(const std::string& dir, 179 TransactionToken* token, 180 std::vector<string>* result) { 181 return Status::OK(); 182 } 183 184 /// \brief Given a pattern, stores in *results the set of paths that matches 185 /// that pattern. *results is cleared. 186 /// 187 /// pattern must match all of a name, not just a substring. 188 /// 189 /// pattern: { term } 190 /// term: 191 /// '*': matches any sequence of non-'/' characters 192 /// '?': matches a single non-'/' character 193 /// '[' [ '^' ] { match-list } ']': 194 /// matches any single character (not) on the list 195 /// c: matches character c (c != '*', '?', '\\', '[') 196 /// '\\' c: matches character c 197 /// character-range: 198 /// c: matches character c (c != '\\', '-', ']') 199 /// '\\' c: matches character c 200 /// lo '-' hi: matches character c for lo <= c <= hi 201 /// 202 /// Typical return codes: 203 /// * OK - no errors 204 /// * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not 205 /// implemented GetMatchingPaths(const std::string & pattern,std::vector<string> * results)206 virtual tensorflow::Status GetMatchingPaths(const std::string& pattern, 207 std::vector<string>* results) { 208 return GetMatchingPaths(pattern, nullptr, results); 209 } 210 GetMatchingPaths(const std::string & pattern,TransactionToken * token,std::vector<string> * results)211 virtual tensorflow::Status GetMatchingPaths(const std::string& pattern, 212 TransactionToken* token, 213 std::vector<string>* results) { 214 return Status::OK(); 215 } 216 217 /// \brief Checks if the given filename matches the pattern. 218 /// 219 /// This function provides the equivalent of posix fnmatch, however it is 220 /// implemented without fnmatch to ensure that this can be used for cloud 221 /// filesystems on windows. For windows filesystems, it uses PathMatchSpec. 222 virtual bool Match(const std::string& filename, const std::string& pattern); 223 224 /// \brief Obtains statistics for the given path. Stat(const std::string & fname,FileStatistics * stat)225 virtual tensorflow::Status Stat(const std::string& fname, 226 FileStatistics* stat) { 227 return Stat(fname, nullptr, stat); 228 } 229 Stat(const std::string & fname,TransactionToken * token,FileStatistics * stat)230 virtual tensorflow::Status Stat(const std::string& fname, 231 TransactionToken* token, 232 FileStatistics* stat) { 233 return Status::OK(); 234 } 235 236 /// \brief Deletes the named file. DeleteFile(const std::string & fname)237 virtual tensorflow::Status DeleteFile(const std::string& fname) { 238 return DeleteFile(fname, nullptr); 239 } 240 DeleteFile(const std::string & fname,TransactionToken * token)241 virtual tensorflow::Status DeleteFile(const std::string& fname, 242 TransactionToken* token) { 243 return Status::OK(); 244 } 245 246 /// \brief Creates the specified directory. 247 /// Typical return codes: 248 /// * OK - successfully created the directory. 249 /// * ALREADY_EXISTS - directory with name dirname already exists. 250 /// * PERMISSION_DENIED - dirname is not writable. CreateDir(const std::string & dirname)251 virtual tensorflow::Status CreateDir(const std::string& dirname) { 252 return CreateDir(dirname, nullptr); 253 } 254 CreateDir(const std::string & dirname,TransactionToken * token)255 virtual tensorflow::Status CreateDir(const std::string& dirname, 256 TransactionToken* token) { 257 return Status::OK(); 258 } 259 260 /// \brief Creates the specified directory and all the necessary 261 /// subdirectories. 262 /// Typical return codes: 263 /// * OK - successfully created the directory and sub directories, even if 264 /// they were already created. 265 /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. RecursivelyCreateDir(const std::string & dirname)266 virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname) { 267 return RecursivelyCreateDir(dirname, nullptr); 268 } 269 270 virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname, 271 TransactionToken* token); 272 273 /// \brief Deletes the specified directory. DeleteDir(const std::string & dirname)274 virtual tensorflow::Status DeleteDir(const std::string& dirname) { 275 return DeleteDir(dirname, nullptr); 276 }; 277 DeleteDir(const std::string & dirname,TransactionToken * token)278 virtual tensorflow::Status DeleteDir(const std::string& dirname, 279 TransactionToken* token) { 280 return Status::OK(); 281 } 282 283 /// \brief Deletes the specified directory and all subdirectories and files 284 /// underneath it. This is accomplished by traversing the directory tree 285 /// rooted at dirname and deleting entries as they are encountered. 286 /// 287 /// If dirname itself is not readable or does not exist, *undeleted_dir_count 288 /// is set to 1, *undeleted_file_count is set to 0 and an appropriate status 289 /// (e.g. NOT_FOUND) is returned. 290 /// 291 /// If dirname and all its descendants were successfully deleted, TF_OK is 292 /// returned and both error counters are set to zero. 293 /// 294 /// Otherwise, while traversing the tree, undeleted_file_count and 295 /// undeleted_dir_count are updated if an entry of the corresponding type 296 /// could not be deleted. The returned error status represents the reason that 297 /// any one of these entries could not be deleted. 298 /// 299 /// REQUIRES: undeleted_files, undeleted_dirs to be not null. 300 /// 301 /// Typical return codes: 302 /// * OK - dirname exists and we were able to delete everything underneath. 303 /// * NOT_FOUND - dirname doesn't exist 304 /// * PERMISSION_DENIED - dirname or some descendant is not writable 305 /// * UNIMPLEMENTED - Some underlying functions (like Delete) are not 306 /// implemented DeleteRecursively(const std::string & dirname,int64 * undeleted_files,int64 * undeleted_dirs)307 virtual tensorflow::Status DeleteRecursively(const std::string& dirname, 308 int64* undeleted_files, 309 int64* undeleted_dirs) { 310 return DeleteRecursively(dirname, nullptr, undeleted_files, undeleted_dirs); 311 } 312 313 virtual tensorflow::Status DeleteRecursively(const std::string& dirname, 314 TransactionToken* token, 315 int64* undeleted_files, 316 int64* undeleted_dirs); 317 318 /// \brief Stores the size of `fname` in `*file_size`. GetFileSize(const std::string & fname,uint64 * file_size)319 virtual tensorflow::Status GetFileSize(const std::string& fname, 320 uint64* file_size) { 321 return GetFileSize(fname, nullptr, file_size); 322 } 323 GetFileSize(const std::string & fname,TransactionToken * token,uint64 * file_size)324 virtual tensorflow::Status GetFileSize(const std::string& fname, 325 TransactionToken* token, 326 uint64* file_size) { 327 return Status::OK(); 328 } 329 330 /// \brief Overwrites the target if it exists. RenameFile(const std::string & src,const std::string & target)331 virtual tensorflow::Status RenameFile(const std::string& src, 332 const std::string& target) { 333 return RenameFile(src, target, nullptr); 334 } 335 RenameFile(const std::string & src,const std::string & target,TransactionToken * token)336 virtual tensorflow::Status RenameFile(const std::string& src, 337 const std::string& target, 338 TransactionToken* token) { 339 return Status::OK(); 340 } 341 342 /// \brief Copy the src to target. CopyFile(const std::string & src,const std::string & target)343 virtual tensorflow::Status CopyFile(const std::string& src, 344 const std::string& target) { 345 return CopyFile(src, target, nullptr); 346 } 347 348 virtual tensorflow::Status CopyFile(const std::string& src, 349 const std::string& target, 350 TransactionToken* token); 351 352 /// \brief Translate an URI to a filename for the FileSystem implementation. 353 /// 354 /// The implementation in this class cleans up the path, removing 355 /// duplicate /'s, resolving .. and removing trailing '/'. 356 /// This respects relative vs. absolute paths, but does not 357 /// invoke any system calls (getcwd(2)) in order to resolve relative 358 /// paths with respect to the actual working directory. That is, this is 359 /// purely string manipulation, completely independent of process state. 360 virtual std::string TranslateName(const std::string& name) const; 361 362 /// \brief Returns whether the given path is a directory or not. 363 /// 364 /// Typical return codes (not guaranteed exhaustive): 365 /// * OK - The path exists and is a directory. 366 /// * FAILED_PRECONDITION - The path exists and is not a directory. 367 /// * NOT_FOUND - The path entry does not exist. 368 /// * PERMISSION_DENIED - Insufficient permissions. 369 /// * UNIMPLEMENTED - The file factory doesn't support directories. IsDirectory(const std::string & fname)370 virtual tensorflow::Status IsDirectory(const std::string& fname) { 371 return IsDirectory(fname, nullptr); 372 } 373 374 virtual tensorflow::Status IsDirectory(const std::string& fname, 375 TransactionToken* token); 376 377 /// \brief Returns whether the given path is on a file system 378 /// that has atomic move capabilities. This can be used 379 /// to determine if there needs to be a temp location to safely write objects. 380 /// The second boolean argument has_atomic_move contains this information. 381 /// 382 /// Returns one of the following status codes (not guaranteed exhaustive): 383 /// * OK - The path is on a recognized file system, 384 /// so has_atomic_move holds the above information. 385 /// * UNIMPLEMENTED - The file system of the path hasn't been implemented in 386 /// TF 387 virtual Status HasAtomicMove(const std::string& path, bool* has_atomic_move); 388 389 /// \brief Flushes any cached filesystem objects from memory. FlushCaches()390 virtual void FlushCaches() { FlushCaches(nullptr); } 391 392 virtual void FlushCaches(TransactionToken* token); 393 394 /// \brief The separator this filesystem uses. 395 /// 396 /// This is implemented as a part of the filesystem, because even on windows, 397 /// a user may need access to filesystems with '/' separators, such as cloud 398 /// filesystems. 399 virtual char Separator() const; 400 401 /// \brief Split a path to its basename and dirname. 402 /// 403 /// Helper function for Basename and Dirname. 404 std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) const; 405 406 /// \brief returns the final file name in the given path. 407 /// 408 /// Returns the part of the path after the final "/". If there is no 409 /// "/" in the path, the result is the same as the input. 410 virtual StringPiece Basename(StringPiece path) const; 411 412 /// \brief Returns the part of the path before the final "/". 413 /// 414 /// If there is a single leading "/" in the path, the result will be the 415 /// leading "/". If there is no "/" in the path, the result is the empty 416 /// prefix of the input. 417 StringPiece Dirname(StringPiece path) const; 418 419 /// \brief Returns the part of the basename of path after the final ".". 420 /// 421 /// If there is no "." in the basename, the result is empty. 422 StringPiece Extension(StringPiece path) const; 423 424 /// \brief Clean duplicate and trailing, "/"s, and resolve ".." and ".". 425 /// 426 /// NOTE: This respects relative vs. absolute paths, but does not 427 /// invoke any system calls (getcwd(2)) in order to resolve relative 428 /// paths with respect to the actual working directory. That is, this is 429 /// purely string manipulation, completely independent of process state. 430 std::string CleanPath(StringPiece path) const; 431 432 /// \brief Creates a URI from a scheme, host, and path. 433 /// 434 /// If the scheme is empty, we just return the path. 435 std::string CreateURI(StringPiece scheme, StringPiece host, 436 StringPiece path) const; 437 438 /// \brief Creates a temporary file name with an extension. 439 std::string GetTempFilename(const std::string& extension) const; 440 441 /// \brief Return true if path is absolute. 442 bool IsAbsolutePath(tensorflow::StringPiece path) const; 443 444 #ifndef SWIG // variadic templates 445 /// \brief Join multiple paths together. 446 /// 447 /// This function also removes the unnecessary path separators. 448 /// For example: 449 /// 450 /// Arguments | JoinPath 451 /// ---------------------------+---------- 452 /// '/foo', 'bar' | /foo/bar 453 /// '/foo/', 'bar' | /foo/bar 454 /// '/foo', '/bar' | /foo/bar 455 /// 456 /// Usage: 457 /// string path = io::JoinPath("/mydir", filename); 458 /// string path = io::JoinPath(FLAGS_test_srcdir, filename); 459 /// string path = io::JoinPath("/full", "path", "to", "filename"); 460 template <typename... T> JoinPath(const T &...args)461 std::string JoinPath(const T&... args) { 462 return JoinPathImpl({args...}); 463 } 464 #endif /* SWIG */ 465 466 std::string JoinPathImpl( 467 std::initializer_list<tensorflow::StringPiece> paths); 468 469 /// \brief Populates the scheme, host, and path from a URI. 470 /// 471 /// scheme, host, and path are guaranteed by this function to point into the 472 /// contents of uri, even if empty. 473 /// 474 /// Corner cases: 475 /// - If the URI is invalid, scheme and host are set to empty strings and the 476 /// passed string is assumed to be a path 477 /// - If the URI omits the path (e.g. file://host), then the path is left 478 /// empty. 479 void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host, 480 StringPiece* path) const; 481 482 // Transaction related API 483 484 /// \brief Starts a new transaction StartTransaction(TransactionToken ** token)485 virtual tensorflow::Status StartTransaction(TransactionToken** token) { 486 *token = nullptr; 487 return Status::OK(); 488 } 489 490 /// \brief Adds `path` to transaction in `token` AddToTransaction(const std::string & path,TransactionToken * token)491 virtual tensorflow::Status AddToTransaction(const std::string& path, 492 TransactionToken* token) { 493 return Status::OK(); 494 } 495 496 /// \brief Ends transaction EndTransaction(TransactionToken * token)497 virtual tensorflow::Status EndTransaction(TransactionToken* token) { 498 return Status::OK(); 499 } 500 501 /// \brief Get token for `path` or start a new transaction and add `path` to 502 /// it. GetTokenOrStartTransaction(const std::string & path,TransactionToken ** token)503 virtual tensorflow::Status GetTokenOrStartTransaction( 504 const std::string& path, TransactionToken** token) { 505 *token = nullptr; 506 return Status::OK(); 507 } 508 509 /// \brief Return transaction for `path` or nullptr in `token` GetTransactionForPath(const std::string & path,TransactionToken ** token)510 virtual tensorflow::Status GetTransactionForPath(const std::string& path, 511 TransactionToken** token) { 512 *token = nullptr; 513 return Status::OK(); 514 } 515 516 /// \brief Decode transaction to human readable string. 517 virtual std::string DecodeTransaction(const TransactionToken* token); 518 FileSystem()519 FileSystem() {} 520 521 virtual ~FileSystem() = default; 522 }; 523 /// This macro adds forwarding methods from FileSystem class to 524 /// used class since name hiding will prevent these to be accessed from 525 /// derived classes and would require all use locations to migrate to 526 /// Transactional API. This is an interim solution until ModularFileSystem class 527 /// becomes a singleton. 528 // TODO(sami): Remove this macro when filesystem plugins migration is complete. 529 #define TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT \ 530 using FileSystem::NewRandomAccessFile; \ 531 using FileSystem::NewWritableFile; \ 532 using FileSystem::NewAppendableFile; \ 533 using FileSystem::NewReadOnlyMemoryRegionFromFile; \ 534 using FileSystem::FileExists; \ 535 using FileSystem::GetChildren; \ 536 using FileSystem::GetMatchingPaths; \ 537 using FileSystem::Stat; \ 538 using FileSystem::DeleteFile; \ 539 using FileSystem::RecursivelyCreateDir; \ 540 using FileSystem::DeleteDir; \ 541 using FileSystem::DeleteRecursively; \ 542 using FileSystem::GetFileSize; \ 543 using FileSystem::RenameFile; \ 544 using FileSystem::CopyFile; \ 545 using FileSystem::IsDirectory; \ 546 using FileSystem::FlushCaches 547 548 /// A Wrapper class for Transactional FileSystem support. 549 /// This provides means to make use of the transactions with minimal code change 550 /// Any operations that are done through this interface will be through the 551 /// transaction created at the time of construction of this instance. 552 /// See FileSystem documentation for method descriptions. 553 /// This class simply forwards all calls to wrapped filesystem either with given 554 /// transaction token or with token used in its construction. This allows doing 555 /// transactional filesystem access with minimal code change. 556 class WrappedFileSystem : public FileSystem { 557 public: 558 TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; 559 NewRandomAccessFile(const std::string & fname,TransactionToken * token,std::unique_ptr<RandomAccessFile> * result)560 tensorflow::Status NewRandomAccessFile( 561 const std::string& fname, TransactionToken* token, 562 std::unique_ptr<RandomAccessFile>* result) override { 563 return fs_->NewRandomAccessFile(fname, (token ? token : token_), result); 564 } 565 NewWritableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)566 tensorflow::Status NewWritableFile( 567 const std::string& fname, TransactionToken* token, 568 std::unique_ptr<WritableFile>* result) override { 569 return fs_->NewWritableFile(fname, (token ? token : token_), result); 570 } 571 NewAppendableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)572 tensorflow::Status NewAppendableFile( 573 const std::string& fname, TransactionToken* token, 574 std::unique_ptr<WritableFile>* result) override { 575 return fs_->NewAppendableFile(fname, (token ? token : token_), result); 576 } 577 NewReadOnlyMemoryRegionFromFile(const std::string & fname,TransactionToken * token,std::unique_ptr<ReadOnlyMemoryRegion> * result)578 tensorflow::Status NewReadOnlyMemoryRegionFromFile( 579 const std::string& fname, TransactionToken* token, 580 std::unique_ptr<ReadOnlyMemoryRegion>* result) override { 581 return fs_->NewReadOnlyMemoryRegionFromFile(fname, (token ? token : token_), 582 result); 583 } 584 FileExists(const std::string & fname,TransactionToken * token)585 tensorflow::Status FileExists(const std::string& fname, 586 TransactionToken* token) override { 587 return fs_->FileExists(fname, (token ? token : token_)); 588 } 589 FilesExist(const std::vector<string> & files,TransactionToken * token,std::vector<Status> * status)590 bool FilesExist(const std::vector<string>& files, TransactionToken* token, 591 std::vector<Status>* status) override { 592 return fs_->FilesExist(files, (token ? token : token_), status); 593 } 594 GetChildren(const std::string & dir,TransactionToken * token,std::vector<string> * result)595 tensorflow::Status GetChildren(const std::string& dir, 596 TransactionToken* token, 597 std::vector<string>* result) override { 598 return fs_->GetChildren(dir, (token ? token : token_), result); 599 } 600 GetMatchingPaths(const std::string & pattern,TransactionToken * token,std::vector<string> * results)601 tensorflow::Status GetMatchingPaths(const std::string& pattern, 602 TransactionToken* token, 603 std::vector<string>* results) override { 604 return fs_->GetMatchingPaths(pattern, (token ? token : token_), results); 605 } 606 Match(const std::string & filename,const std::string & pattern)607 bool Match(const std::string& filename, const std::string& pattern) override { 608 return fs_->Match(filename, pattern); 609 } 610 Stat(const std::string & fname,TransactionToken * token,FileStatistics * stat)611 tensorflow::Status Stat(const std::string& fname, TransactionToken* token, 612 FileStatistics* stat) override { 613 return fs_->Stat(fname, (token ? token : token_), stat); 614 } 615 DeleteFile(const std::string & fname,TransactionToken * token)616 tensorflow::Status DeleteFile(const std::string& fname, 617 TransactionToken* token) override { 618 return fs_->DeleteFile(fname, (token ? token : token_)); 619 } 620 CreateDir(const std::string & dirname,TransactionToken * token)621 tensorflow::Status CreateDir(const std::string& dirname, 622 TransactionToken* token) override { 623 return fs_->CreateDir(dirname, (token ? token : token_)); 624 } 625 RecursivelyCreateDir(const std::string & dirname,TransactionToken * token)626 tensorflow::Status RecursivelyCreateDir(const std::string& dirname, 627 TransactionToken* token) override { 628 return fs_->RecursivelyCreateDir(dirname, (token ? token : token_)); 629 } 630 DeleteDir(const std::string & dirname,TransactionToken * token)631 tensorflow::Status DeleteDir(const std::string& dirname, 632 TransactionToken* token) override { 633 return fs_->DeleteDir(dirname, (token ? token : token_)); 634 } 635 DeleteRecursively(const std::string & dirname,TransactionToken * token,int64 * undeleted_files,int64 * undeleted_dirs)636 tensorflow::Status DeleteRecursively(const std::string& dirname, 637 TransactionToken* token, 638 int64* undeleted_files, 639 int64* undeleted_dirs) override { 640 return fs_->DeleteRecursively(dirname, (token ? token : token_), 641 undeleted_files, undeleted_dirs); 642 } 643 GetFileSize(const std::string & fname,TransactionToken * token,uint64 * file_size)644 tensorflow::Status GetFileSize(const std::string& fname, 645 TransactionToken* token, 646 uint64* file_size) override { 647 return fs_->GetFileSize(fname, (token ? token : token_), file_size); 648 } 649 RenameFile(const std::string & src,const std::string & target,TransactionToken * token)650 tensorflow::Status RenameFile(const std::string& src, 651 const std::string& target, 652 TransactionToken* token) override { 653 return fs_->RenameFile(src, target, (token ? token : token_)); 654 } 655 CopyFile(const std::string & src,const std::string & target,TransactionToken * token)656 tensorflow::Status CopyFile(const std::string& src, const std::string& target, 657 TransactionToken* token) override { 658 return fs_->CopyFile(src, target, (token ? token : token_)); 659 } 660 TranslateName(const std::string & name)661 std::string TranslateName(const std::string& name) const override { 662 return fs_->TranslateName(name); 663 } 664 IsDirectory(const std::string & fname,TransactionToken * token)665 tensorflow::Status IsDirectory(const std::string& fname, 666 TransactionToken* token) override { 667 return fs_->IsDirectory(fname, (token ? token : token_)); 668 } 669 HasAtomicMove(const std::string & path,bool * has_atomic_move)670 Status HasAtomicMove(const std::string& path, 671 bool* has_atomic_move) override { 672 return fs_->HasAtomicMove(path, has_atomic_move); 673 } 674 FlushCaches(TransactionToken * token)675 void FlushCaches(TransactionToken* token) override { 676 return fs_->FlushCaches((token ? token : token_)); 677 } 678 Separator()679 char Separator() const override { return fs_->Separator(); } 680 Basename(StringPiece path)681 StringPiece Basename(StringPiece path) const override { 682 return fs_->Basename(path); 683 } 684 StartTransaction(TransactionToken ** token)685 tensorflow::Status StartTransaction(TransactionToken** token) override { 686 return fs_->StartTransaction(token); 687 } 688 AddToTransaction(const std::string & path,TransactionToken * token)689 tensorflow::Status AddToTransaction(const std::string& path, 690 TransactionToken* token) override { 691 return fs_->AddToTransaction(path, (token ? token : token_)); 692 } 693 EndTransaction(TransactionToken * token)694 tensorflow::Status EndTransaction(TransactionToken* token) override { 695 return fs_->EndTransaction(token); 696 } 697 GetTransactionForPath(const std::string & path,TransactionToken ** token)698 tensorflow::Status GetTransactionForPath(const std::string& path, 699 TransactionToken** token) override { 700 return fs_->GetTransactionForPath(path, token); 701 } 702 GetTokenOrStartTransaction(const std::string & path,TransactionToken ** token)703 tensorflow::Status GetTokenOrStartTransaction( 704 const std::string& path, TransactionToken** token) override { 705 return fs_->GetTokenOrStartTransaction(path, token); 706 } 707 DecodeTransaction(const TransactionToken * token)708 std::string DecodeTransaction(const TransactionToken* token) override { 709 return fs_->DecodeTransaction((token ? token : token_)); 710 } 711 WrappedFileSystem(FileSystem * file_system,TransactionToken * token)712 WrappedFileSystem(FileSystem* file_system, TransactionToken* token) 713 : fs_(file_system), token_(token) {} 714 715 ~WrappedFileSystem() override = default; 716 717 private: 718 FileSystem* fs_; 719 TransactionToken* token_; 720 }; 721 722 /// A file abstraction for randomly reading the contents of a file. 723 class RandomAccessFile { 724 public: RandomAccessFile()725 RandomAccessFile() {} 726 virtual ~RandomAccessFile() = default; 727 728 /// \brief Returns the name of the file. 729 /// 730 /// This is an optional operation that may not be implemented by every 731 /// filesystem. Name(StringPiece * result)732 virtual tensorflow::Status Name(StringPiece* result) const { 733 return errors::Unimplemented("This filesystem does not support Name()"); 734 } 735 736 /// \brief Reads up to `n` bytes from the file starting at `offset`. 737 /// 738 /// `scratch[0..n-1]` may be written by this routine. Sets `*result` 739 /// to the data that was read (including if fewer than `n` bytes were 740 /// successfully read). May set `*result` to point at data in 741 /// `scratch[0..n-1]`, so `scratch[0..n-1]` must be live when 742 /// `*result` is used. 743 /// 744 /// On OK returned status: `n` bytes have been stored in `*result`. 745 /// On non-OK returned status: `[0..n]` bytes have been stored in `*result`. 746 /// 747 /// Returns `OUT_OF_RANGE` if fewer than n bytes were stored in `*result` 748 /// because of EOF. 749 /// 750 /// Safe for concurrent use by multiple threads. 751 virtual tensorflow::Status Read(uint64 offset, size_t n, StringPiece* result, 752 char* scratch) const = 0; 753 754 #if defined(TF_CORD_SUPPORT) 755 /// \brief Read up to `n` bytes from the file starting at `offset`. Read(uint64 offset,size_t n,absl::Cord * cord)756 virtual tensorflow::Status Read(uint64 offset, size_t n, 757 absl::Cord* cord) const { 758 return errors::Unimplemented( 759 "Read(uint64, size_t, absl::Cord*) is not " 760 "implemented"); 761 } 762 #endif 763 764 private: 765 TF_DISALLOW_COPY_AND_ASSIGN(RandomAccessFile); 766 }; 767 768 /// \brief A file abstraction for sequential writing. 769 /// 770 /// The implementation must provide buffering since callers may append 771 /// small fragments at a time to the file. 772 class WritableFile { 773 public: WritableFile()774 WritableFile() {} 775 virtual ~WritableFile() = default; 776 777 /// \brief Append 'data' to the file. 778 virtual tensorflow::Status Append(StringPiece data) = 0; 779 780 #if defined(TF_CORD_SUPPORT) 781 // \brief Append 'data' to the file. Append(const absl::Cord & cord)782 virtual tensorflow::Status Append(const absl::Cord& cord) { 783 return errors::Unimplemented("Append(absl::Cord) is not implemented"); 784 } 785 #endif 786 787 /// \brief Close the file. 788 /// 789 /// Flush() and de-allocate resources associated with this file 790 /// 791 /// Typical return codes (not guaranteed to be exhaustive): 792 /// * OK 793 /// * Other codes, as returned from Flush() 794 virtual tensorflow::Status Close() = 0; 795 796 /// \brief Flushes the file and optionally syncs contents to filesystem. 797 /// 798 /// This should flush any local buffers whose contents have not been 799 /// delivered to the filesystem. 800 /// 801 /// If the process terminates after a successful flush, the contents 802 /// may still be persisted, since the underlying filesystem may 803 /// eventually flush the contents. If the OS or machine crashes 804 /// after a successful flush, the contents may or may not be 805 /// persisted, depending on the implementation. 806 virtual tensorflow::Status Flush() = 0; 807 808 // \brief Returns the name of the file. 809 /// 810 /// This is an optional operation that may not be implemented by every 811 /// filesystem. Name(StringPiece * result)812 virtual tensorflow::Status Name(StringPiece* result) const { 813 return errors::Unimplemented("This filesystem does not support Name()"); 814 } 815 816 /// \brief Syncs contents of file to filesystem. 817 /// 818 /// This waits for confirmation from the filesystem that the contents 819 /// of the file have been persisted to the filesystem; if the OS 820 /// or machine crashes after a successful Sync, the contents should 821 /// be properly saved. 822 virtual tensorflow::Status Sync() = 0; 823 824 /// \brief Retrieves the current write position in the file, or -1 on 825 /// error. 826 /// 827 /// This is an optional operation, subclasses may choose to return 828 /// errors::Unimplemented. Tell(int64 * position)829 virtual tensorflow::Status Tell(int64* position) { 830 *position = -1; 831 return errors::Unimplemented("This filesystem does not support Tell()"); 832 } 833 834 private: 835 TF_DISALLOW_COPY_AND_ASSIGN(WritableFile); 836 }; 837 838 /// \brief A readonly memmapped file abstraction. 839 /// 840 /// The implementation must guarantee that all memory is accessible when the 841 /// object exists, independently from the Env that created it. 842 class ReadOnlyMemoryRegion { 843 public: ReadOnlyMemoryRegion()844 ReadOnlyMemoryRegion() {} 845 virtual ~ReadOnlyMemoryRegion() = default; 846 847 /// \brief Returns a pointer to the memory region. 848 virtual const void* data() = 0; 849 850 /// \brief Returns the length of the memory region in bytes. 851 virtual uint64 length() = 0; 852 }; 853 854 /// \brief A registry for file system implementations. 855 /// 856 /// Filenames are specified as an URI, which is of the form 857 /// [scheme://]<filename>. 858 /// File system implementations are registered using the REGISTER_FILE_SYSTEM 859 /// macro, providing the 'scheme' as the key. 860 /// 861 /// There are two `Register` methods: one using `Factory` for legacy filesystems 862 /// (deprecated mechanism of subclassing `FileSystem` and using 863 /// `REGISTER_FILE_SYSTEM` macro), and one using `std::unique_ptr<FileSystem>` 864 /// for the new modular approach. 865 /// 866 /// Note that the new API expects a pointer to `ModularFileSystem` but this is 867 /// not checked as there should be exactly one caller to the API and doing the 868 /// check results in a circular dependency between `BUILD` targets. 869 /// 870 /// Plan is to completely remove the filesystem registration from `Env` and 871 /// incorporate it into `ModularFileSystem` class (which will be renamed to be 872 /// the only `FileSystem` class and marked as `final`). But this will happen at 873 /// a later time, after we convert all filesystems to the new API. 874 /// 875 /// TODO(mihaimaruseac): After all filesystems are converted, remove old 876 /// registration and update comment. 877 class FileSystemRegistry { 878 public: 879 typedef std::function<FileSystem*()> Factory; 880 881 virtual ~FileSystemRegistry() = default; 882 virtual tensorflow::Status Register(const std::string& scheme, 883 Factory factory) = 0; 884 virtual tensorflow::Status Register( 885 const std::string& scheme, std::unique_ptr<FileSystem> filesystem) = 0; 886 virtual FileSystem* Lookup(const std::string& scheme) = 0; 887 virtual tensorflow::Status GetRegisteredFileSystemSchemes( 888 std::vector<std::string>* schemes) = 0; 889 }; 890 891 } // namespace tensorflow 892 893 #endif // TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 894