1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 17 #define TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 18 19 #include <stdint.h> 20 21 #include <functional> 22 #include <string> 23 #include <unordered_map> 24 #include <utility> 25 #include <vector> 26 27 #include "tensorflow/core/platform/cord.h" 28 #include "tensorflow/core/platform/errors.h" 29 #include "tensorflow/core/platform/file_statistics.h" 30 #include "tensorflow/core/platform/macros.h" 31 #include "tensorflow/core/platform/platform.h" 32 #include "tensorflow/core/platform/stringpiece.h" 33 #include "tensorflow/core/platform/types.h" 34 35 #ifdef PLATFORM_WINDOWS 36 #undef DeleteFile 37 #undef CopyFile 38 #undef TranslateName 39 #endif 40 41 namespace tensorflow { 42 43 class RandomAccessFile; 44 class ReadOnlyMemoryRegion; 45 class WritableFile; 46 47 class FileSystem; 48 struct TransactionToken { 49 FileSystem* owner; 50 void* token; 51 }; 52 53 /// A generic interface for accessing a file system. Implementations 54 /// of custom filesystem adapters must implement this interface, 55 /// RandomAccessFile, WritableFile, and ReadOnlyMemoryRegion classes. 56 class FileSystem { 57 public: 58 /// \brief Creates a brand new random access read-only file with the 59 /// specified name. 60 /// 61 /// On success, stores a pointer to the new file in 62 /// *result and returns OK. On failure stores NULL in *result and 63 /// returns non-OK. If the file does not exist, returns a non-OK 64 /// status. 65 /// 66 /// The returned file may be concurrently accessed by multiple threads. 67 /// 68 /// The ownership of the returned RandomAccessFile is passed to the caller 69 /// and the object should be deleted when is not used. NewRandomAccessFile(const std::string & fname,std::unique_ptr<RandomAccessFile> * result)70 virtual tensorflow::Status NewRandomAccessFile( 71 const std::string& fname, std::unique_ptr<RandomAccessFile>* result) { 72 return NewRandomAccessFile(fname, nullptr, result); 73 } 74 NewRandomAccessFile(const std::string & fname,TransactionToken * token,std::unique_ptr<RandomAccessFile> * result)75 virtual tensorflow::Status NewRandomAccessFile( 76 const std::string& fname, TransactionToken* token, 77 std::unique_ptr<RandomAccessFile>* result) { 78 // We duplicate these methods due to Google internal coding style prevents 79 // virtual functions with default arguments. See PR #41615. 80 return Status::OK(); 81 } 82 83 /// \brief Creates an object that writes to a new file with the specified 84 /// name. 85 /// 86 /// Deletes any existing file with the same name and creates a 87 /// new file. On success, stores a pointer to the new file in 88 /// *result and returns OK. On failure stores NULL in *result and 89 /// returns non-OK. 90 /// 91 /// The returned file will only be accessed by one thread at a time. 92 /// 93 /// The ownership of the returned WritableFile is passed to the caller 94 /// and the object should be deleted when is not used. NewWritableFile(const std::string & fname,std::unique_ptr<WritableFile> * result)95 virtual tensorflow::Status NewWritableFile( 96 const std::string& fname, std::unique_ptr<WritableFile>* result) { 97 return NewWritableFile(fname, nullptr, result); 98 } 99 NewWritableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)100 virtual tensorflow::Status NewWritableFile( 101 const std::string& fname, TransactionToken* token, 102 std::unique_ptr<WritableFile>* result) { 103 return Status::OK(); 104 } 105 106 /// \brief Creates an object that either appends to an existing file, or 107 /// writes to a new file (if the file does not exist to begin with). 108 /// 109 /// On success, stores a pointer to the new file in *result and 110 /// returns OK. On failure stores NULL in *result and returns 111 /// non-OK. 112 /// 113 /// The returned file will only be accessed by one thread at a time. 114 /// 115 /// The ownership of the returned WritableFile is passed to the caller 116 /// and the object should be deleted when is not used. NewAppendableFile(const std::string & fname,std::unique_ptr<WritableFile> * result)117 virtual tensorflow::Status NewAppendableFile( 118 const std::string& fname, std::unique_ptr<WritableFile>* result) { 119 return NewAppendableFile(fname, nullptr, result); 120 } 121 NewAppendableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)122 virtual tensorflow::Status NewAppendableFile( 123 const std::string& fname, TransactionToken* token, 124 std::unique_ptr<WritableFile>* result) { 125 return Status::OK(); 126 } 127 128 /// \brief Creates a readonly region of memory with the file context. 129 /// 130 /// On success, it returns a pointer to read-only memory region 131 /// from the content of file fname. The ownership of the region is passed to 132 /// the caller. On failure stores nullptr in *result and returns non-OK. 133 /// 134 /// The returned memory region can be accessed from many threads in parallel. 135 /// 136 /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller 137 /// and the object should be deleted when is not used. NewReadOnlyMemoryRegionFromFile(const std::string & fname,std::unique_ptr<ReadOnlyMemoryRegion> * result)138 virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( 139 const std::string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) { 140 return NewReadOnlyMemoryRegionFromFile(fname, nullptr, result); 141 } 142 NewReadOnlyMemoryRegionFromFile(const std::string & fname,TransactionToken * token,std::unique_ptr<ReadOnlyMemoryRegion> * result)143 virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( 144 const std::string& fname, TransactionToken* token, 145 std::unique_ptr<ReadOnlyMemoryRegion>* result) { 146 return Status::OK(); 147 } 148 149 /// Returns OK if the named path exists and NOT_FOUND otherwise. FileExists(const std::string & fname)150 virtual tensorflow::Status FileExists(const std::string& fname) { 151 return FileExists(fname, nullptr); 152 } 153 FileExists(const std::string & fname,TransactionToken * token)154 virtual tensorflow::Status FileExists(const std::string& fname, 155 TransactionToken* token) { 156 return Status::OK(); 157 } 158 159 /// Returns true if all the listed files exist, false otherwise. 160 /// if status is not null, populate the vector with a detailed status 161 /// for each file. FilesExist(const std::vector<string> & files,std::vector<Status> * status)162 virtual bool FilesExist(const std::vector<string>& files, 163 std::vector<Status>* status) { 164 return FilesExist(files, nullptr, status); 165 } 166 167 virtual bool FilesExist(const std::vector<string>& files, 168 TransactionToken* token, std::vector<Status>* status); 169 170 /// \brief Returns the immediate children in the given directory. 171 /// 172 /// The returned paths are relative to 'dir'. GetChildren(const std::string & dir,std::vector<string> * result)173 virtual tensorflow::Status GetChildren(const std::string& dir, 174 std::vector<string>* result) { 175 return GetChildren(dir, nullptr, result); 176 } 177 GetChildren(const std::string & dir,TransactionToken * token,std::vector<string> * result)178 virtual tensorflow::Status GetChildren(const std::string& dir, 179 TransactionToken* token, 180 std::vector<string>* result) { 181 return Status::OK(); 182 } 183 184 /// \brief Given a pattern, stores in *results the set of paths that matches 185 /// that pattern. *results is cleared. 186 /// 187 /// pattern must match all of a name, not just a substring. 188 /// 189 /// pattern: { term } 190 /// term: 191 /// '*': matches any sequence of non-'/' characters 192 /// '?': matches a single non-'/' character 193 /// '[' [ '^' ] { match-list } ']': 194 /// matches any single character (not) on the list 195 /// c: matches character c (c != '*', '?', '\\', '[') 196 /// '\\' c: matches character c 197 /// character-range: 198 /// c: matches character c (c != '\\', '-', ']') 199 /// '\\' c: matches character c 200 /// lo '-' hi: matches character c for lo <= c <= hi 201 /// 202 /// Typical return codes: 203 /// * OK - no errors 204 /// * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not 205 /// implemented GetMatchingPaths(const std::string & pattern,std::vector<string> * results)206 virtual tensorflow::Status GetMatchingPaths(const std::string& pattern, 207 std::vector<string>* results) { 208 return GetMatchingPaths(pattern, nullptr, results); 209 } 210 GetMatchingPaths(const std::string & pattern,TransactionToken * token,std::vector<string> * results)211 virtual tensorflow::Status GetMatchingPaths(const std::string& pattern, 212 TransactionToken* token, 213 std::vector<string>* results) { 214 return Status::OK(); 215 } 216 217 /// \brief Checks if the given filename matches the pattern. 218 /// 219 /// This function provides the equivalent of posix fnmatch, however it is 220 /// implemented without fnmatch to ensure that this can be used for cloud 221 /// filesystems on windows. For windows filesystems, it uses PathMatchSpec. 222 virtual bool Match(const std::string& filename, const std::string& pattern); 223 224 /// \brief Obtains statistics for the given path. Stat(const std::string & fname,FileStatistics * stat)225 virtual tensorflow::Status Stat(const std::string& fname, 226 FileStatistics* stat) { 227 return Stat(fname, nullptr, stat); 228 } 229 Stat(const std::string & fname,TransactionToken * token,FileStatistics * stat)230 virtual tensorflow::Status Stat(const std::string& fname, 231 TransactionToken* token, 232 FileStatistics* stat) { 233 return Status::OK(); 234 } 235 236 /// \brief Deletes the named file. DeleteFile(const std::string & fname)237 virtual tensorflow::Status DeleteFile(const std::string& fname) { 238 return DeleteFile(fname, nullptr); 239 } 240 DeleteFile(const std::string & fname,TransactionToken * token)241 virtual tensorflow::Status DeleteFile(const std::string& fname, 242 TransactionToken* token) { 243 return Status::OK(); 244 } 245 246 /// \brief Creates the specified directory. 247 /// Typical return codes: 248 /// * OK - successfully created the directory. 249 /// * ALREADY_EXISTS - directory with name dirname already exists. 250 /// * PERMISSION_DENIED - dirname is not writable. CreateDir(const std::string & dirname)251 virtual tensorflow::Status CreateDir(const std::string& dirname) { 252 return CreateDir(dirname, nullptr); 253 } 254 CreateDir(const std::string & dirname,TransactionToken * token)255 virtual tensorflow::Status CreateDir(const std::string& dirname, 256 TransactionToken* token) { 257 return Status::OK(); 258 } 259 260 /// \brief Creates the specified directory and all the necessary 261 /// subdirectories. 262 /// Typical return codes: 263 /// * OK - successfully created the directory and sub directories, even if 264 /// they were already created. 265 /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. RecursivelyCreateDir(const std::string & dirname)266 virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname) { 267 return RecursivelyCreateDir(dirname, nullptr); 268 } 269 270 virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname, 271 TransactionToken* token); 272 273 /// \brief Deletes the specified directory. DeleteDir(const std::string & dirname)274 virtual tensorflow::Status DeleteDir(const std::string& dirname) { 275 return DeleteDir(dirname, nullptr); 276 } 277 DeleteDir(const std::string & dirname,TransactionToken * token)278 virtual tensorflow::Status DeleteDir(const std::string& dirname, 279 TransactionToken* token) { 280 return Status::OK(); 281 } 282 283 /// \brief Deletes the specified directory and all subdirectories and files 284 /// underneath it. This is accomplished by traversing the directory tree 285 /// rooted at dirname and deleting entries as they are encountered. 286 /// 287 /// If dirname itself is not readable or does not exist, *undeleted_dir_count 288 /// is set to 1, *undeleted_file_count is set to 0 and an appropriate status 289 /// (e.g. NOT_FOUND) is returned. 290 /// 291 /// If dirname and all its descendants were successfully deleted, TF_OK is 292 /// returned and both error counters are set to zero. 293 /// 294 /// Otherwise, while traversing the tree, undeleted_file_count and 295 /// undeleted_dir_count are updated if an entry of the corresponding type 296 /// could not be deleted. The returned error status represents the reason that 297 /// any one of these entries could not be deleted. 298 /// 299 /// REQUIRES: undeleted_files, undeleted_dirs to be not null. 300 /// 301 /// Typical return codes: 302 /// * OK - dirname exists and we were able to delete everything underneath. 303 /// * NOT_FOUND - dirname doesn't exist 304 /// * PERMISSION_DENIED - dirname or some descendant is not writable 305 /// * UNIMPLEMENTED - Some underlying functions (like Delete) are not 306 /// implemented DeleteRecursively(const std::string & dirname,int64 * undeleted_files,int64 * undeleted_dirs)307 virtual tensorflow::Status DeleteRecursively(const std::string& dirname, 308 int64* undeleted_files, 309 int64* undeleted_dirs) { 310 return DeleteRecursively(dirname, nullptr, undeleted_files, undeleted_dirs); 311 } 312 313 virtual tensorflow::Status DeleteRecursively(const std::string& dirname, 314 TransactionToken* token, 315 int64* undeleted_files, 316 int64* undeleted_dirs); 317 318 /// \brief Stores the size of `fname` in `*file_size`. GetFileSize(const std::string & fname,uint64 * file_size)319 virtual tensorflow::Status GetFileSize(const std::string& fname, 320 uint64* file_size) { 321 return GetFileSize(fname, nullptr, file_size); 322 } 323 GetFileSize(const std::string & fname,TransactionToken * token,uint64 * file_size)324 virtual tensorflow::Status GetFileSize(const std::string& fname, 325 TransactionToken* token, 326 uint64* file_size) { 327 return Status::OK(); 328 } 329 330 /// \brief Overwrites the target if it exists. RenameFile(const std::string & src,const std::string & target)331 virtual tensorflow::Status RenameFile(const std::string& src, 332 const std::string& target) { 333 return RenameFile(src, target, nullptr); 334 } 335 RenameFile(const std::string & src,const std::string & target,TransactionToken * token)336 virtual tensorflow::Status RenameFile(const std::string& src, 337 const std::string& target, 338 TransactionToken* token) { 339 return Status::OK(); 340 } 341 342 /// \brief Copy the src to target. CopyFile(const std::string & src,const std::string & target)343 virtual tensorflow::Status CopyFile(const std::string& src, 344 const std::string& target) { 345 return CopyFile(src, target, nullptr); 346 } 347 348 virtual tensorflow::Status CopyFile(const std::string& src, 349 const std::string& target, 350 TransactionToken* token); 351 352 /// \brief Translate an URI to a filename for the FileSystem implementation. 353 /// 354 /// The implementation in this class cleans up the path, removing 355 /// duplicate /'s, resolving .. and removing trailing '/'. 356 /// This respects relative vs. absolute paths, but does not 357 /// invoke any system calls (getcwd(2)) in order to resolve relative 358 /// paths with respect to the actual working directory. That is, this is 359 /// purely string manipulation, completely independent of process state. 360 virtual std::string TranslateName(const std::string& name) const; 361 362 /// \brief Returns whether the given path is a directory or not. 363 /// 364 /// Typical return codes (not guaranteed exhaustive): 365 /// * OK - The path exists and is a directory. 366 /// * FAILED_PRECONDITION - The path exists and is not a directory. 367 /// * NOT_FOUND - The path entry does not exist. 368 /// * PERMISSION_DENIED - Insufficient permissions. 369 /// * UNIMPLEMENTED - The file factory doesn't support directories. IsDirectory(const std::string & fname)370 virtual tensorflow::Status IsDirectory(const std::string& fname) { 371 return IsDirectory(fname, nullptr); 372 } 373 374 virtual tensorflow::Status IsDirectory(const std::string& fname, 375 TransactionToken* token); 376 377 /// \brief Returns whether the given path is on a file system 378 /// that has atomic move capabilities. This can be used 379 /// to determine if there needs to be a temp location to safely write objects. 380 /// The second boolean argument has_atomic_move contains this information. 381 /// 382 /// Returns one of the following status codes (not guaranteed exhaustive): 383 /// * OK - The path is on a recognized file system, 384 /// so has_atomic_move holds the above information. 385 /// * UNIMPLEMENTED - The file system of the path hasn't been implemented in 386 /// TF 387 virtual Status HasAtomicMove(const std::string& path, bool* has_atomic_move); 388 389 /// \brief Flushes any cached filesystem objects from memory. FlushCaches()390 virtual void FlushCaches() { FlushCaches(nullptr); } 391 392 virtual void FlushCaches(TransactionToken* token); 393 394 /// \brief The separator this filesystem uses. 395 /// 396 /// This is implemented as a part of the filesystem, because even on windows, 397 /// a user may need access to filesystems with '/' separators, such as cloud 398 /// filesystems. 399 virtual char Separator() const; 400 401 /// \brief Split a path to its basename and dirname. 402 /// 403 /// Helper function for Basename and Dirname. 404 std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) const; 405 406 /// \brief returns the final file name in the given path. 407 /// 408 /// Returns the part of the path after the final "/". If there is no 409 /// "/" in the path, the result is the same as the input. 410 virtual StringPiece Basename(StringPiece path) const; 411 412 /// \brief Returns the part of the path before the final "/". 413 /// 414 /// If there is a single leading "/" in the path, the result will be the 415 /// leading "/". If there is no "/" in the path, the result is the empty 416 /// prefix of the input. 417 StringPiece Dirname(StringPiece path) const; 418 419 /// \brief Returns the part of the basename of path after the final ".". 420 /// 421 /// If there is no "." in the basename, the result is empty. 422 StringPiece Extension(StringPiece path) const; 423 424 /// \brief Clean duplicate and trailing, "/"s, and resolve ".." and ".". 425 /// 426 /// NOTE: This respects relative vs. absolute paths, but does not 427 /// invoke any system calls (getcwd(2)) in order to resolve relative 428 /// paths with respect to the actual working directory. That is, this is 429 /// purely string manipulation, completely independent of process state. 430 std::string CleanPath(StringPiece path) const; 431 432 /// \brief Creates a URI from a scheme, host, and path. 433 /// 434 /// If the scheme is empty, we just return the path. 435 std::string CreateURI(StringPiece scheme, StringPiece host, 436 StringPiece path) const; 437 438 /// \brief Creates a temporary file name with an extension. 439 std::string GetTempFilename(const std::string& extension) const; 440 441 /// \brief Return true if path is absolute. 442 bool IsAbsolutePath(tensorflow::StringPiece path) const; 443 444 #ifndef SWIG // variadic templates 445 /// \brief Join multiple paths together. 446 /// 447 /// This function also removes the unnecessary path separators. 448 /// For example: 449 /// 450 /// Arguments | JoinPath 451 /// ---------------------------+---------- 452 /// '/foo', 'bar' | /foo/bar 453 /// '/foo/', 'bar' | /foo/bar 454 /// '/foo', '/bar' | /foo/bar 455 /// 456 /// Usage: 457 /// string path = io::JoinPath("/mydir", filename); 458 /// string path = io::JoinPath(FLAGS_test_srcdir, filename); 459 /// string path = io::JoinPath("/full", "path", "to", "filename"); 460 template <typename... T> JoinPath(const T &...args)461 std::string JoinPath(const T&... args) { 462 return JoinPathImpl({args...}); 463 } 464 #endif /* SWIG */ 465 466 std::string JoinPathImpl( 467 std::initializer_list<tensorflow::StringPiece> paths); 468 469 /// \brief Populates the scheme, host, and path from a URI. 470 /// 471 /// scheme, host, and path are guaranteed by this function to point into the 472 /// contents of uri, even if empty. 473 /// 474 /// Corner cases: 475 /// - If the URI is invalid, scheme and host are set to empty strings and the 476 /// passed string is assumed to be a path 477 /// - If the URI omits the path (e.g. file://host), then the path is left 478 /// empty. 479 void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host, 480 StringPiece* path) const; 481 482 // Transaction related API 483 484 /// \brief Starts a new transaction StartTransaction(TransactionToken ** token)485 virtual tensorflow::Status StartTransaction(TransactionToken** token) { 486 *token = nullptr; 487 return Status::OK(); 488 } 489 490 /// \brief Adds `path` to transaction in `token` AddToTransaction(const std::string & path,TransactionToken * token)491 virtual tensorflow::Status AddToTransaction(const std::string& path, 492 TransactionToken* token) { 493 return Status::OK(); 494 } 495 496 /// \brief Ends transaction EndTransaction(TransactionToken * token)497 virtual tensorflow::Status EndTransaction(TransactionToken* token) { 498 return Status::OK(); 499 } 500 501 /// \brief Get token for `path` or start a new transaction and add `path` to 502 /// it. GetTokenOrStartTransaction(const std::string & path,TransactionToken ** token)503 virtual tensorflow::Status GetTokenOrStartTransaction( 504 const std::string& path, TransactionToken** token) { 505 *token = nullptr; 506 return Status::OK(); 507 } 508 509 /// \brief Return transaction for `path` or nullptr in `token` GetTransactionForPath(const std::string & path,TransactionToken ** token)510 virtual tensorflow::Status GetTransactionForPath(const std::string& path, 511 TransactionToken** token) { 512 *token = nullptr; 513 return Status::OK(); 514 } 515 516 /// \brief Decode transaction to human readable string. 517 virtual std::string DecodeTransaction(const TransactionToken* token); 518 519 /// \brief Set File System Configuration Option SetOption(const std::string & name,const std::vector<string> & values)520 virtual tensorflow::Status SetOption(const std::string& name, 521 const std::vector<string>& values) { 522 return errors::Unimplemented("SetOption"); 523 } 524 525 /// \brief Set File System Configuration Option SetOption(const std::string & name,const std::vector<int64> & values)526 virtual tensorflow::Status SetOption(const std::string& name, 527 const std::vector<int64>& values) { 528 return errors::Unimplemented("SetOption"); 529 } 530 531 /// \brief Set File System Configuration Option SetOption(const std::string & name,const std::vector<double> & values)532 virtual tensorflow::Status SetOption(const std::string& name, 533 const std::vector<double>& values) { 534 return errors::Unimplemented("SetOption"); 535 } 536 FileSystem()537 FileSystem() {} 538 539 virtual ~FileSystem() = default; 540 }; 541 /// This macro adds forwarding methods from FileSystem class to 542 /// used class since name hiding will prevent these to be accessed from 543 /// derived classes and would require all use locations to migrate to 544 /// Transactional API. This is an interim solution until ModularFileSystem class 545 /// becomes a singleton. 546 // TODO(sami): Remove this macro when filesystem plugins migration is complete. 547 #define TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT \ 548 using FileSystem::NewRandomAccessFile; \ 549 using FileSystem::NewWritableFile; \ 550 using FileSystem::NewAppendableFile; \ 551 using FileSystem::NewReadOnlyMemoryRegionFromFile; \ 552 using FileSystem::FileExists; \ 553 using FileSystem::GetChildren; \ 554 using FileSystem::GetMatchingPaths; \ 555 using FileSystem::Stat; \ 556 using FileSystem::DeleteFile; \ 557 using FileSystem::RecursivelyCreateDir; \ 558 using FileSystem::DeleteDir; \ 559 using FileSystem::DeleteRecursively; \ 560 using FileSystem::GetFileSize; \ 561 using FileSystem::RenameFile; \ 562 using FileSystem::CopyFile; \ 563 using FileSystem::IsDirectory; \ 564 using FileSystem::FlushCaches 565 566 /// A Wrapper class for Transactional FileSystem support. 567 /// This provides means to make use of the transactions with minimal code change 568 /// Any operations that are done through this interface will be through the 569 /// transaction created at the time of construction of this instance. 570 /// See FileSystem documentation for method descriptions. 571 /// This class simply forwards all calls to wrapped filesystem either with given 572 /// transaction token or with token used in its construction. This allows doing 573 /// transactional filesystem access with minimal code change. 574 class WrappedFileSystem : public FileSystem { 575 public: 576 TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; 577 NewRandomAccessFile(const std::string & fname,TransactionToken * token,std::unique_ptr<RandomAccessFile> * result)578 tensorflow::Status NewRandomAccessFile( 579 const std::string& fname, TransactionToken* token, 580 std::unique_ptr<RandomAccessFile>* result) override { 581 return fs_->NewRandomAccessFile(fname, (token ? token : token_), result); 582 } 583 NewWritableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)584 tensorflow::Status NewWritableFile( 585 const std::string& fname, TransactionToken* token, 586 std::unique_ptr<WritableFile>* result) override { 587 return fs_->NewWritableFile(fname, (token ? token : token_), result); 588 } 589 NewAppendableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)590 tensorflow::Status NewAppendableFile( 591 const std::string& fname, TransactionToken* token, 592 std::unique_ptr<WritableFile>* result) override { 593 return fs_->NewAppendableFile(fname, (token ? token : token_), result); 594 } 595 NewReadOnlyMemoryRegionFromFile(const std::string & fname,TransactionToken * token,std::unique_ptr<ReadOnlyMemoryRegion> * result)596 tensorflow::Status NewReadOnlyMemoryRegionFromFile( 597 const std::string& fname, TransactionToken* token, 598 std::unique_ptr<ReadOnlyMemoryRegion>* result) override { 599 return fs_->NewReadOnlyMemoryRegionFromFile(fname, (token ? token : token_), 600 result); 601 } 602 FileExists(const std::string & fname,TransactionToken * token)603 tensorflow::Status FileExists(const std::string& fname, 604 TransactionToken* token) override { 605 return fs_->FileExists(fname, (token ? token : token_)); 606 } 607 FilesExist(const std::vector<string> & files,TransactionToken * token,std::vector<Status> * status)608 bool FilesExist(const std::vector<string>& files, TransactionToken* token, 609 std::vector<Status>* status) override { 610 return fs_->FilesExist(files, (token ? token : token_), status); 611 } 612 GetChildren(const std::string & dir,TransactionToken * token,std::vector<string> * result)613 tensorflow::Status GetChildren(const std::string& dir, 614 TransactionToken* token, 615 std::vector<string>* result) override { 616 return fs_->GetChildren(dir, (token ? token : token_), result); 617 } 618 GetMatchingPaths(const std::string & pattern,TransactionToken * token,std::vector<string> * results)619 tensorflow::Status GetMatchingPaths(const std::string& pattern, 620 TransactionToken* token, 621 std::vector<string>* results) override { 622 return fs_->GetMatchingPaths(pattern, (token ? token : token_), results); 623 } 624 Match(const std::string & filename,const std::string & pattern)625 bool Match(const std::string& filename, const std::string& pattern) override { 626 return fs_->Match(filename, pattern); 627 } 628 Stat(const std::string & fname,TransactionToken * token,FileStatistics * stat)629 tensorflow::Status Stat(const std::string& fname, TransactionToken* token, 630 FileStatistics* stat) override { 631 return fs_->Stat(fname, (token ? token : token_), stat); 632 } 633 DeleteFile(const std::string & fname,TransactionToken * token)634 tensorflow::Status DeleteFile(const std::string& fname, 635 TransactionToken* token) override { 636 return fs_->DeleteFile(fname, (token ? token : token_)); 637 } 638 CreateDir(const std::string & dirname,TransactionToken * token)639 tensorflow::Status CreateDir(const std::string& dirname, 640 TransactionToken* token) override { 641 return fs_->CreateDir(dirname, (token ? token : token_)); 642 } 643 RecursivelyCreateDir(const std::string & dirname,TransactionToken * token)644 tensorflow::Status RecursivelyCreateDir(const std::string& dirname, 645 TransactionToken* token) override { 646 return fs_->RecursivelyCreateDir(dirname, (token ? token : token_)); 647 } 648 DeleteDir(const std::string & dirname,TransactionToken * token)649 tensorflow::Status DeleteDir(const std::string& dirname, 650 TransactionToken* token) override { 651 return fs_->DeleteDir(dirname, (token ? token : token_)); 652 } 653 DeleteRecursively(const std::string & dirname,TransactionToken * token,int64 * undeleted_files,int64 * undeleted_dirs)654 tensorflow::Status DeleteRecursively(const std::string& dirname, 655 TransactionToken* token, 656 int64* undeleted_files, 657 int64* undeleted_dirs) override { 658 return fs_->DeleteRecursively(dirname, (token ? token : token_), 659 undeleted_files, undeleted_dirs); 660 } 661 GetFileSize(const std::string & fname,TransactionToken * token,uint64 * file_size)662 tensorflow::Status GetFileSize(const std::string& fname, 663 TransactionToken* token, 664 uint64* file_size) override { 665 return fs_->GetFileSize(fname, (token ? token : token_), file_size); 666 } 667 RenameFile(const std::string & src,const std::string & target,TransactionToken * token)668 tensorflow::Status RenameFile(const std::string& src, 669 const std::string& target, 670 TransactionToken* token) override { 671 return fs_->RenameFile(src, target, (token ? token : token_)); 672 } 673 CopyFile(const std::string & src,const std::string & target,TransactionToken * token)674 tensorflow::Status CopyFile(const std::string& src, const std::string& target, 675 TransactionToken* token) override { 676 return fs_->CopyFile(src, target, (token ? token : token_)); 677 } 678 TranslateName(const std::string & name)679 std::string TranslateName(const std::string& name) const override { 680 return fs_->TranslateName(name); 681 } 682 IsDirectory(const std::string & fname,TransactionToken * token)683 tensorflow::Status IsDirectory(const std::string& fname, 684 TransactionToken* token) override { 685 return fs_->IsDirectory(fname, (token ? token : token_)); 686 } 687 HasAtomicMove(const std::string & path,bool * has_atomic_move)688 Status HasAtomicMove(const std::string& path, 689 bool* has_atomic_move) override { 690 return fs_->HasAtomicMove(path, has_atomic_move); 691 } 692 FlushCaches(TransactionToken * token)693 void FlushCaches(TransactionToken* token) override { 694 return fs_->FlushCaches((token ? token : token_)); 695 } 696 Separator()697 char Separator() const override { return fs_->Separator(); } 698 Basename(StringPiece path)699 StringPiece Basename(StringPiece path) const override { 700 return fs_->Basename(path); 701 } 702 StartTransaction(TransactionToken ** token)703 tensorflow::Status StartTransaction(TransactionToken** token) override { 704 return fs_->StartTransaction(token); 705 } 706 AddToTransaction(const std::string & path,TransactionToken * token)707 tensorflow::Status AddToTransaction(const std::string& path, 708 TransactionToken* token) override { 709 return fs_->AddToTransaction(path, (token ? token : token_)); 710 } 711 EndTransaction(TransactionToken * token)712 tensorflow::Status EndTransaction(TransactionToken* token) override { 713 return fs_->EndTransaction(token); 714 } 715 GetTransactionForPath(const std::string & path,TransactionToken ** token)716 tensorflow::Status GetTransactionForPath(const std::string& path, 717 TransactionToken** token) override { 718 return fs_->GetTransactionForPath(path, token); 719 } 720 GetTokenOrStartTransaction(const std::string & path,TransactionToken ** token)721 tensorflow::Status GetTokenOrStartTransaction( 722 const std::string& path, TransactionToken** token) override { 723 return fs_->GetTokenOrStartTransaction(path, token); 724 } 725 DecodeTransaction(const TransactionToken * token)726 std::string DecodeTransaction(const TransactionToken* token) override { 727 return fs_->DecodeTransaction((token ? token : token_)); 728 } 729 WrappedFileSystem(FileSystem * file_system,TransactionToken * token)730 WrappedFileSystem(FileSystem* file_system, TransactionToken* token) 731 : fs_(file_system), token_(token) {} 732 733 ~WrappedFileSystem() override = default; 734 735 private: 736 FileSystem* fs_; 737 TransactionToken* token_; 738 }; 739 740 /// A file abstraction for randomly reading the contents of a file. 741 class RandomAccessFile { 742 public: RandomAccessFile()743 RandomAccessFile() {} 744 virtual ~RandomAccessFile() = default; 745 746 /// \brief Returns the name of the file. 747 /// 748 /// This is an optional operation that may not be implemented by every 749 /// filesystem. Name(StringPiece * result)750 virtual tensorflow::Status Name(StringPiece* result) const { 751 return errors::Unimplemented("This filesystem does not support Name()"); 752 } 753 754 /// \brief Reads up to `n` bytes from the file starting at `offset`. 755 /// 756 /// `scratch[0..n-1]` may be written by this routine. Sets `*result` 757 /// to the data that was read (including if fewer than `n` bytes were 758 /// successfully read). May set `*result` to point at data in 759 /// `scratch[0..n-1]`, so `scratch[0..n-1]` must be live when 760 /// `*result` is used. 761 /// 762 /// On OK returned status: `n` bytes have been stored in `*result`. 763 /// On non-OK returned status: `[0..n]` bytes have been stored in `*result`. 764 /// 765 /// Returns `OUT_OF_RANGE` if fewer than n bytes were stored in `*result` 766 /// because of EOF. 767 /// 768 /// Safe for concurrent use by multiple threads. 769 virtual tensorflow::Status Read(uint64 offset, size_t n, StringPiece* result, 770 char* scratch) const = 0; 771 772 #if defined(TF_CORD_SUPPORT) 773 /// \brief Read up to `n` bytes from the file starting at `offset`. Read(uint64 offset,size_t n,absl::Cord * cord)774 virtual tensorflow::Status Read(uint64 offset, size_t n, 775 absl::Cord* cord) const { 776 return errors::Unimplemented( 777 "Read(uint64, size_t, absl::Cord*) is not " 778 "implemented"); 779 } 780 #endif 781 782 private: 783 TF_DISALLOW_COPY_AND_ASSIGN(RandomAccessFile); 784 }; 785 786 /// \brief A file abstraction for sequential writing. 787 /// 788 /// The implementation must provide buffering since callers may append 789 /// small fragments at a time to the file. 790 class WritableFile { 791 public: WritableFile()792 WritableFile() {} 793 virtual ~WritableFile() = default; 794 795 /// \brief Append 'data' to the file. 796 virtual tensorflow::Status Append(StringPiece data) = 0; 797 798 #if defined(TF_CORD_SUPPORT) 799 // \brief Append 'data' to the file. Append(const absl::Cord & cord)800 virtual tensorflow::Status Append(const absl::Cord& cord) { 801 for (StringPiece chunk : cord.Chunks()) { 802 TF_RETURN_IF_ERROR(Append(chunk)); 803 } 804 return tensorflow::Status::OK(); 805 } 806 #endif 807 808 /// \brief Close the file. 809 /// 810 /// Flush() and de-allocate resources associated with this file 811 /// 812 /// Typical return codes (not guaranteed to be exhaustive): 813 /// * OK 814 /// * Other codes, as returned from Flush() 815 virtual tensorflow::Status Close() = 0; 816 817 /// \brief Flushes the file and optionally syncs contents to filesystem. 818 /// 819 /// This should flush any local buffers whose contents have not been 820 /// delivered to the filesystem. 821 /// 822 /// If the process terminates after a successful flush, the contents 823 /// may still be persisted, since the underlying filesystem may 824 /// eventually flush the contents. If the OS or machine crashes 825 /// after a successful flush, the contents may or may not be 826 /// persisted, depending on the implementation. 827 virtual tensorflow::Status Flush() = 0; 828 829 // \brief Returns the name of the file. 830 /// 831 /// This is an optional operation that may not be implemented by every 832 /// filesystem. Name(StringPiece * result)833 virtual tensorflow::Status Name(StringPiece* result) const { 834 return errors::Unimplemented("This filesystem does not support Name()"); 835 } 836 837 /// \brief Syncs contents of file to filesystem. 838 /// 839 /// This waits for confirmation from the filesystem that the contents 840 /// of the file have been persisted to the filesystem; if the OS 841 /// or machine crashes after a successful Sync, the contents should 842 /// be properly saved. 843 virtual tensorflow::Status Sync() = 0; 844 845 /// \brief Retrieves the current write position in the file, or -1 on 846 /// error. 847 /// 848 /// This is an optional operation, subclasses may choose to return 849 /// errors::Unimplemented. Tell(int64 * position)850 virtual tensorflow::Status Tell(int64* position) { 851 *position = -1; 852 return errors::Unimplemented("This filesystem does not support Tell()"); 853 } 854 855 private: 856 TF_DISALLOW_COPY_AND_ASSIGN(WritableFile); 857 }; 858 859 /// \brief A readonly memmapped file abstraction. 860 /// 861 /// The implementation must guarantee that all memory is accessible when the 862 /// object exists, independently from the Env that created it. 863 class ReadOnlyMemoryRegion { 864 public: ReadOnlyMemoryRegion()865 ReadOnlyMemoryRegion() {} 866 virtual ~ReadOnlyMemoryRegion() = default; 867 868 /// \brief Returns a pointer to the memory region. 869 virtual const void* data() = 0; 870 871 /// \brief Returns the length of the memory region in bytes. 872 virtual uint64 length() = 0; 873 }; 874 875 /// \brief A registry for file system implementations. 876 /// 877 /// Filenames are specified as an URI, which is of the form 878 /// [scheme://]<filename>. 879 /// File system implementations are registered using the REGISTER_FILE_SYSTEM 880 /// macro, providing the 'scheme' as the key. 881 /// 882 /// There are two `Register` methods: one using `Factory` for legacy filesystems 883 /// (deprecated mechanism of subclassing `FileSystem` and using 884 /// `REGISTER_FILE_SYSTEM` macro), and one using `std::unique_ptr<FileSystem>` 885 /// for the new modular approach. 886 /// 887 /// Note that the new API expects a pointer to `ModularFileSystem` but this is 888 /// not checked as there should be exactly one caller to the API and doing the 889 /// check results in a circular dependency between `BUILD` targets. 890 /// 891 /// Plan is to completely remove the filesystem registration from `Env` and 892 /// incorporate it into `ModularFileSystem` class (which will be renamed to be 893 /// the only `FileSystem` class and marked as `final`). But this will happen at 894 /// a later time, after we convert all filesystems to the new API. 895 /// 896 /// TODO(mihaimaruseac): After all filesystems are converted, remove old 897 /// registration and update comment. 898 class FileSystemRegistry { 899 public: 900 typedef std::function<FileSystem*()> Factory; 901 902 virtual ~FileSystemRegistry() = default; 903 virtual tensorflow::Status Register(const std::string& scheme, 904 Factory factory) = 0; 905 virtual tensorflow::Status Register( 906 const std::string& scheme, std::unique_ptr<FileSystem> filesystem) = 0; 907 virtual FileSystem* Lookup(const std::string& scheme) = 0; 908 virtual tensorflow::Status GetRegisteredFileSystemSchemes( 909 std::vector<std::string>* schemes) = 0; 910 }; 911 912 } // namespace tensorflow 913 914 #endif // TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 915