1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 17 #define TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 18 19 #include <stdint.h> 20 21 #include <functional> 22 #include <string> 23 #include <unordered_map> 24 #include <utility> 25 #include <vector> 26 27 #include "tensorflow/core/platform/cord.h" 28 #include "tensorflow/core/platform/errors.h" 29 #include "tensorflow/core/platform/file_statistics.h" 30 #include "tensorflow/core/platform/macros.h" 31 #include "tensorflow/core/platform/platform.h" 32 #include "tensorflow/core/platform/stringpiece.h" 33 #include "tensorflow/core/platform/types.h" 34 35 #ifdef PLATFORM_WINDOWS 36 #undef DeleteFile 37 #undef CopyFile 38 #undef TranslateName 39 #endif 40 41 namespace tensorflow { 42 43 class RandomAccessFile; 44 class ReadOnlyMemoryRegion; 45 class WritableFile; 46 47 class FileSystem; 48 struct TransactionToken { 49 FileSystem* owner; 50 void* token; 51 }; 52 53 /// A generic interface for accessing a file system. Implementations 54 /// of custom filesystem adapters must implement this interface, 55 /// RandomAccessFile, WritableFile, and ReadOnlyMemoryRegion classes. 56 class FileSystem { 57 public: 58 /// \brief Creates a brand new random access read-only file with the 59 /// specified name. 60 /// 61 /// On success, stores a pointer to the new file in 62 /// *result and returns OK. On failure stores NULL in *result and 63 /// returns non-OK. If the file does not exist, returns a non-OK 64 /// status. 65 /// 66 /// The returned file may be concurrently accessed by multiple threads. 67 /// 68 /// The ownership of the returned RandomAccessFile is passed to the caller 69 /// and the object should be deleted when is not used. NewRandomAccessFile(const std::string & fname,std::unique_ptr<RandomAccessFile> * result)70 virtual tensorflow::Status NewRandomAccessFile( 71 const std::string& fname, std::unique_ptr<RandomAccessFile>* result) { 72 return NewRandomAccessFile(fname, nullptr, result); 73 } 74 NewRandomAccessFile(const std::string & fname,TransactionToken * token,std::unique_ptr<RandomAccessFile> * result)75 virtual tensorflow::Status NewRandomAccessFile( 76 const std::string& fname, TransactionToken* token, 77 std::unique_ptr<RandomAccessFile>* result) { 78 // We duplicate these methods due to Google internal coding style prevents 79 // virtual functions with default arguments. See PR #41615. 80 return OkStatus(); 81 } 82 83 /// \brief Creates an object that writes to a new file with the specified 84 /// name. 85 /// 86 /// Deletes any existing file with the same name and creates a 87 /// new file. On success, stores a pointer to the new file in 88 /// *result and returns OK. On failure stores NULL in *result and 89 /// returns non-OK. 90 /// 91 /// The returned file will only be accessed by one thread at a time. 92 /// 93 /// The ownership of the returned WritableFile is passed to the caller 94 /// and the object should be deleted when is not used. NewWritableFile(const std::string & fname,std::unique_ptr<WritableFile> * result)95 virtual tensorflow::Status NewWritableFile( 96 const std::string& fname, std::unique_ptr<WritableFile>* result) { 97 return NewWritableFile(fname, nullptr, result); 98 } 99 NewWritableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)100 virtual tensorflow::Status NewWritableFile( 101 const std::string& fname, TransactionToken* token, 102 std::unique_ptr<WritableFile>* result) { 103 return OkStatus(); 104 } 105 106 /// \brief Creates an object that either appends to an existing file, or 107 /// writes to a new file (if the file does not exist to begin with). 108 /// 109 /// On success, stores a pointer to the new file in *result and 110 /// returns OK. On failure stores NULL in *result and returns 111 /// non-OK. 112 /// 113 /// The returned file will only be accessed by one thread at a time. 114 /// 115 /// The ownership of the returned WritableFile is passed to the caller 116 /// and the object should be deleted when is not used. NewAppendableFile(const std::string & fname,std::unique_ptr<WritableFile> * result)117 virtual tensorflow::Status NewAppendableFile( 118 const std::string& fname, std::unique_ptr<WritableFile>* result) { 119 return NewAppendableFile(fname, nullptr, result); 120 } 121 NewAppendableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)122 virtual tensorflow::Status NewAppendableFile( 123 const std::string& fname, TransactionToken* token, 124 std::unique_ptr<WritableFile>* result) { 125 return OkStatus(); 126 } 127 128 /// \brief Creates a readonly region of memory with the file context. 129 /// 130 /// On success, it returns a pointer to read-only memory region 131 /// from the content of file fname. The ownership of the region is passed to 132 /// the caller. On failure stores nullptr in *result and returns non-OK. 133 /// 134 /// The returned memory region can be accessed from many threads in parallel. 135 /// 136 /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller 137 /// and the object should be deleted when is not used. NewReadOnlyMemoryRegionFromFile(const std::string & fname,std::unique_ptr<ReadOnlyMemoryRegion> * result)138 virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( 139 const std::string& fname, std::unique_ptr<ReadOnlyMemoryRegion>* result) { 140 return NewReadOnlyMemoryRegionFromFile(fname, nullptr, result); 141 } 142 NewReadOnlyMemoryRegionFromFile(const std::string & fname,TransactionToken * token,std::unique_ptr<ReadOnlyMemoryRegion> * result)143 virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( 144 const std::string& fname, TransactionToken* token, 145 std::unique_ptr<ReadOnlyMemoryRegion>* result) { 146 return OkStatus(); 147 } 148 149 /// Returns OK if the named path exists and NOT_FOUND otherwise. FileExists(const std::string & fname)150 virtual tensorflow::Status FileExists(const std::string& fname) { 151 return FileExists(fname, nullptr); 152 } 153 FileExists(const std::string & fname,TransactionToken * token)154 virtual tensorflow::Status FileExists(const std::string& fname, 155 TransactionToken* token) { 156 return OkStatus(); 157 } 158 159 /// Returns true if all the listed files exist, false otherwise. 160 /// if status is not null, populate the vector with a detailed status 161 /// for each file. FilesExist(const std::vector<string> & files,std::vector<Status> * status)162 virtual bool FilesExist(const std::vector<string>& files, 163 std::vector<Status>* status) { 164 return FilesExist(files, nullptr, status); 165 } 166 167 virtual bool FilesExist(const std::vector<string>& files, 168 TransactionToken* token, std::vector<Status>* status); 169 170 /// \brief Returns the immediate children in the given directory. 171 /// 172 /// The returned paths are relative to 'dir'. GetChildren(const std::string & dir,std::vector<string> * result)173 virtual tensorflow::Status GetChildren(const std::string& dir, 174 std::vector<string>* result) { 175 return GetChildren(dir, nullptr, result); 176 } 177 GetChildren(const std::string & dir,TransactionToken * token,std::vector<string> * result)178 virtual tensorflow::Status GetChildren(const std::string& dir, 179 TransactionToken* token, 180 std::vector<string>* result) { 181 return OkStatus(); 182 } 183 184 /// \brief Given a pattern, stores in *results the set of paths that matches 185 /// that pattern. *results is cleared. 186 /// 187 /// pattern must match all of a name, not just a substring. 188 /// 189 /// pattern: { term } 190 /// term: 191 /// '*': matches any sequence of non-'/' characters 192 /// '?': matches a single non-'/' character 193 /// '[' [ '^' ] { match-list } ']': 194 /// matches any single character (not) on the list 195 /// c: matches character c (c != '*', '?', '\\', '[') 196 /// '\\' c: matches character c 197 /// character-range: 198 /// c: matches character c (c != '\\', '-', ']') 199 /// '\\' c: matches character c 200 /// lo '-' hi: matches character c for lo <= c <= hi 201 /// 202 /// Typical return codes: 203 /// * OK - no errors 204 /// * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not 205 /// implemented GetMatchingPaths(const std::string & pattern,std::vector<string> * results)206 virtual tensorflow::Status GetMatchingPaths(const std::string& pattern, 207 std::vector<string>* results) { 208 return GetMatchingPaths(pattern, nullptr, results); 209 } 210 GetMatchingPaths(const std::string & pattern,TransactionToken * token,std::vector<string> * results)211 virtual tensorflow::Status GetMatchingPaths(const std::string& pattern, 212 TransactionToken* token, 213 std::vector<string>* results) { 214 return OkStatus(); 215 } 216 217 /// \brief Checks if the given filename matches the pattern. 218 /// 219 /// This function provides the equivalent of posix fnmatch, however it is 220 /// implemented without fnmatch to ensure that this can be used for cloud 221 /// filesystems on windows. For windows filesystems, it uses PathMatchSpec. 222 virtual bool Match(const std::string& filename, const std::string& pattern); 223 224 /// \brief Obtains statistics for the given path. Stat(const std::string & fname,FileStatistics * stat)225 virtual tensorflow::Status Stat(const std::string& fname, 226 FileStatistics* stat) { 227 return Stat(fname, nullptr, stat); 228 } 229 Stat(const std::string & fname,TransactionToken * token,FileStatistics * stat)230 virtual tensorflow::Status Stat(const std::string& fname, 231 TransactionToken* token, 232 FileStatistics* stat) { 233 return OkStatus(); 234 } 235 236 /// \brief Deletes the named file. DeleteFile(const std::string & fname)237 virtual tensorflow::Status DeleteFile(const std::string& fname) { 238 return DeleteFile(fname, nullptr); 239 } 240 DeleteFile(const std::string & fname,TransactionToken * token)241 virtual tensorflow::Status DeleteFile(const std::string& fname, 242 TransactionToken* token) { 243 return OkStatus(); 244 } 245 246 /// \brief Creates the specified directory. 247 /// Typical return codes: 248 /// * OK - successfully created the directory. 249 /// * ALREADY_EXISTS - directory with name dirname already exists. 250 /// * PERMISSION_DENIED - dirname is not writable. CreateDir(const std::string & dirname)251 virtual tensorflow::Status CreateDir(const std::string& dirname) { 252 return CreateDir(dirname, nullptr); 253 } 254 CreateDir(const std::string & dirname,TransactionToken * token)255 virtual tensorflow::Status CreateDir(const std::string& dirname, 256 TransactionToken* token) { 257 return OkStatus(); 258 } 259 260 /// \brief Creates the specified directory and all the necessary 261 /// subdirectories. 262 /// Typical return codes: 263 /// * OK - successfully created the directory and sub directories, even if 264 /// they were already created. 265 /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. RecursivelyCreateDir(const std::string & dirname)266 virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname) { 267 return RecursivelyCreateDir(dirname, nullptr); 268 } 269 270 virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname, 271 TransactionToken* token); 272 273 /// \brief Deletes the specified directory. DeleteDir(const std::string & dirname)274 virtual tensorflow::Status DeleteDir(const std::string& dirname) { 275 return DeleteDir(dirname, nullptr); 276 } 277 DeleteDir(const std::string & dirname,TransactionToken * token)278 virtual tensorflow::Status DeleteDir(const std::string& dirname, 279 TransactionToken* token) { 280 return OkStatus(); 281 } 282 283 /// \brief Deletes the specified directory and all subdirectories and files 284 /// underneath it. This is accomplished by traversing the directory tree 285 /// rooted at dirname and deleting entries as they are encountered. 286 /// 287 /// If dirname itself is not readable or does not exist, *undeleted_dir_count 288 /// is set to 1, *undeleted_file_count is set to 0 and an appropriate status 289 /// (e.g. NOT_FOUND) is returned. 290 /// 291 /// If dirname and all its descendants were successfully deleted, TF_OK is 292 /// returned and both error counters are set to zero. 293 /// 294 /// Otherwise, while traversing the tree, undeleted_file_count and 295 /// undeleted_dir_count are updated if an entry of the corresponding type 296 /// could not be deleted. The returned error status represents the reason that 297 /// any one of these entries could not be deleted. 298 /// 299 /// REQUIRES: undeleted_files, undeleted_dirs to be not null. 300 /// 301 /// Typical return codes: 302 /// * OK - dirname exists and we were able to delete everything underneath. 303 /// * NOT_FOUND - dirname doesn't exist 304 /// * PERMISSION_DENIED - dirname or some descendant is not writable 305 /// * UNIMPLEMENTED - Some underlying functions (like Delete) are not 306 /// implemented DeleteRecursively(const std::string & dirname,int64_t * undeleted_files,int64_t * undeleted_dirs)307 virtual tensorflow::Status DeleteRecursively(const std::string& dirname, 308 int64_t* undeleted_files, 309 int64_t* undeleted_dirs) { 310 return DeleteRecursively(dirname, nullptr, undeleted_files, undeleted_dirs); 311 } 312 313 virtual tensorflow::Status DeleteRecursively(const std::string& dirname, 314 TransactionToken* token, 315 int64_t* undeleted_files, 316 int64_t* undeleted_dirs); 317 318 /// \brief Stores the size of `fname` in `*file_size`. GetFileSize(const std::string & fname,uint64 * file_size)319 virtual tensorflow::Status GetFileSize(const std::string& fname, 320 uint64* file_size) { 321 return GetFileSize(fname, nullptr, file_size); 322 } 323 GetFileSize(const std::string & fname,TransactionToken * token,uint64 * file_size)324 virtual tensorflow::Status GetFileSize(const std::string& fname, 325 TransactionToken* token, 326 uint64* file_size) { 327 return OkStatus(); 328 } 329 330 /// \brief Overwrites the target if it exists. RenameFile(const std::string & src,const std::string & target)331 virtual tensorflow::Status RenameFile(const std::string& src, 332 const std::string& target) { 333 return RenameFile(src, target, nullptr); 334 } 335 RenameFile(const std::string & src,const std::string & target,TransactionToken * token)336 virtual tensorflow::Status RenameFile(const std::string& src, 337 const std::string& target, 338 TransactionToken* token) { 339 return OkStatus(); 340 } 341 342 /// \brief Copy the src to target. CopyFile(const std::string & src,const std::string & target)343 virtual tensorflow::Status CopyFile(const std::string& src, 344 const std::string& target) { 345 return CopyFile(src, target, nullptr); 346 } 347 348 virtual tensorflow::Status CopyFile(const std::string& src, 349 const std::string& target, 350 TransactionToken* token); 351 352 /// \brief Translate an URI to a filename for the FileSystem implementation. 353 /// 354 /// The implementation in this class cleans up the path, removing 355 /// duplicate /'s, resolving .. and removing trailing '/'. 356 /// This respects relative vs. absolute paths, but does not 357 /// invoke any system calls (getcwd(2)) in order to resolve relative 358 /// paths with respect to the actual working directory. That is, this is 359 /// purely string manipulation, completely independent of process state. 360 virtual std::string TranslateName(const std::string& name) const; 361 362 /// \brief Returns whether the given path is a directory or not. 363 /// 364 /// Typical return codes (not guaranteed exhaustive): 365 /// * OK - The path exists and is a directory. 366 /// * FAILED_PRECONDITION - The path exists and is not a directory. 367 /// * NOT_FOUND - The path entry does not exist. 368 /// * PERMISSION_DENIED - Insufficient permissions. 369 /// * UNIMPLEMENTED - The file factory doesn't support directories. IsDirectory(const std::string & fname)370 virtual tensorflow::Status IsDirectory(const std::string& fname) { 371 return IsDirectory(fname, nullptr); 372 } 373 374 virtual tensorflow::Status IsDirectory(const std::string& fname, 375 TransactionToken* token); 376 377 /// \brief Returns whether the given path is on a file system 378 /// that has atomic move capabilities. This can be used 379 /// to determine if there needs to be a temp location to safely write objects. 380 /// The second boolean argument has_atomic_move contains this information. 381 /// 382 /// Returns one of the following status codes (not guaranteed exhaustive): 383 /// * OK - The path is on a recognized file system, 384 /// so has_atomic_move holds the above information. 385 /// * UNIMPLEMENTED - The file system of the path hasn't been implemented in 386 /// TF 387 virtual Status HasAtomicMove(const std::string& path, bool* has_atomic_move); 388 389 /// \brief Flushes any cached filesystem objects from memory. FlushCaches()390 virtual void FlushCaches() { FlushCaches(nullptr); } 391 392 virtual void FlushCaches(TransactionToken* token); 393 394 /// \brief The separator this filesystem uses. 395 /// 396 /// This is implemented as a part of the filesystem, because even on windows, 397 /// a user may need access to filesystems with '/' separators, such as cloud 398 /// filesystems. 399 virtual char Separator() const; 400 401 /// \brief Split a path to its basename and dirname. 402 /// 403 /// Helper function for Basename and Dirname. 404 std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) const; 405 406 /// \brief returns the final file name in the given path. 407 /// 408 /// Returns the part of the path after the final "/". If there is no 409 /// "/" in the path, the result is the same as the input. 410 virtual StringPiece Basename(StringPiece path) const; 411 412 /// \brief Returns the part of the path before the final "/". 413 /// 414 /// If there is a single leading "/" in the path, the result will be the 415 /// leading "/". If there is no "/" in the path, the result is the empty 416 /// prefix of the input. 417 StringPiece Dirname(StringPiece path) const; 418 419 /// \brief Returns the part of the basename of path after the final ".". 420 /// 421 /// If there is no "." in the basename, the result is empty. 422 StringPiece Extension(StringPiece path) const; 423 424 /// \brief Clean duplicate and trailing, "/"s, and resolve ".." and ".". 425 /// 426 /// NOTE: This respects relative vs. absolute paths, but does not 427 /// invoke any system calls (getcwd(2)) in order to resolve relative 428 /// paths with respect to the actual working directory. That is, this is 429 /// purely string manipulation, completely independent of process state. 430 std::string CleanPath(StringPiece path) const; 431 432 /// \brief Creates a URI from a scheme, host, and path. 433 /// 434 /// If the scheme is empty, we just return the path. 435 std::string CreateURI(StringPiece scheme, StringPiece host, 436 StringPiece path) const; 437 438 /// \brief Creates a temporary file name with an extension. 439 std::string GetTempFilename(const std::string& extension) const; 440 441 /// \brief Return true if path is absolute. 442 bool IsAbsolutePath(tensorflow::StringPiece path) const; 443 444 #ifndef SWIG // variadic templates 445 /// \brief Join multiple paths together. 446 /// 447 /// This function also removes the unnecessary path separators. 448 /// For example: 449 /// 450 /// Arguments | JoinPath 451 /// ---------------------------+---------- 452 /// '/foo', 'bar' | /foo/bar 453 /// '/foo/', 'bar' | /foo/bar 454 /// '/foo', '/bar' | /foo/bar 455 /// 456 /// Usage: 457 /// string path = io::JoinPath("/mydir", filename); 458 /// string path = io::JoinPath(FLAGS_test_srcdir, filename); 459 /// string path = io::JoinPath("/full", "path", "to", "filename"); 460 template <typename... T> JoinPath(const T &...args)461 std::string JoinPath(const T&... args) { 462 return JoinPathImpl({args...}); 463 } 464 #endif /* SWIG */ 465 466 std::string JoinPathImpl( 467 std::initializer_list<tensorflow::StringPiece> paths); 468 469 /// \brief Populates the scheme, host, and path from a URI. 470 /// 471 /// scheme, host, and path are guaranteed by this function to point into the 472 /// contents of uri, even if empty. 473 /// 474 /// Corner cases: 475 /// - If the URI is invalid, scheme and host are set to empty strings and the 476 /// passed string is assumed to be a path 477 /// - If the URI omits the path (e.g. file://host), then the path is left 478 /// empty. 479 void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host, 480 StringPiece* path) const; 481 482 // Transaction related API 483 484 /// \brief Starts a new transaction StartTransaction(TransactionToken ** token)485 virtual tensorflow::Status StartTransaction(TransactionToken** token) { 486 *token = nullptr; 487 return OkStatus(); 488 } 489 490 /// \brief Adds `path` to transaction in `token` AddToTransaction(const std::string & path,TransactionToken * token)491 virtual tensorflow::Status AddToTransaction(const std::string& path, 492 TransactionToken* token) { 493 return OkStatus(); 494 } 495 496 /// \brief Ends transaction EndTransaction(TransactionToken * token)497 virtual tensorflow::Status EndTransaction(TransactionToken* token) { 498 return OkStatus(); 499 } 500 501 /// \brief Get token for `path` or start a new transaction and add `path` to 502 /// it. GetTokenOrStartTransaction(const std::string & path,TransactionToken ** token)503 virtual tensorflow::Status GetTokenOrStartTransaction( 504 const std::string& path, TransactionToken** token) { 505 *token = nullptr; 506 return OkStatus(); 507 } 508 509 /// \brief Return transaction for `path` or nullptr in `token` GetTransactionForPath(const std::string & path,TransactionToken ** token)510 virtual tensorflow::Status GetTransactionForPath(const std::string& path, 511 TransactionToken** token) { 512 *token = nullptr; 513 return OkStatus(); 514 } 515 516 /// \brief Decode transaction to human readable string. 517 virtual std::string DecodeTransaction(const TransactionToken* token); 518 519 /// \brief Set File System Configuration Options SetOption(const string & key,const string & value)520 virtual Status SetOption(const string& key, const string& value) { 521 return errors::Unimplemented("SetOption"); 522 } 523 524 /// \brief Set File System Configuration Option SetOption(const std::string & name,const std::vector<string> & values)525 virtual tensorflow::Status SetOption(const std::string& name, 526 const std::vector<string>& values) { 527 return errors::Unimplemented("SetOption"); 528 } 529 530 /// \brief Set File System Configuration Option SetOption(const std::string & name,const std::vector<int64_t> & values)531 virtual tensorflow::Status SetOption(const std::string& name, 532 const std::vector<int64_t>& values) { 533 return errors::Unimplemented("SetOption"); 534 } 535 536 /// \brief Set File System Configuration Option SetOption(const std::string & name,const std::vector<double> & values)537 virtual tensorflow::Status SetOption(const std::string& name, 538 const std::vector<double>& values) { 539 return errors::Unimplemented("SetOption"); 540 } 541 FileSystem()542 FileSystem() {} 543 544 virtual ~FileSystem() = default; 545 }; 546 /// This macro adds forwarding methods from FileSystem class to 547 /// used class since name hiding will prevent these to be accessed from 548 /// derived classes and would require all use locations to migrate to 549 /// Transactional API. This is an interim solution until ModularFileSystem class 550 /// becomes a singleton. 551 // TODO(sami): Remove this macro when filesystem plugins migration is complete. 552 #define TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT \ 553 using FileSystem::NewRandomAccessFile; \ 554 using FileSystem::NewWritableFile; \ 555 using FileSystem::NewAppendableFile; \ 556 using FileSystem::NewReadOnlyMemoryRegionFromFile; \ 557 using FileSystem::FileExists; \ 558 using FileSystem::GetChildren; \ 559 using FileSystem::GetMatchingPaths; \ 560 using FileSystem::Stat; \ 561 using FileSystem::DeleteFile; \ 562 using FileSystem::RecursivelyCreateDir; \ 563 using FileSystem::DeleteDir; \ 564 using FileSystem::DeleteRecursively; \ 565 using FileSystem::GetFileSize; \ 566 using FileSystem::RenameFile; \ 567 using FileSystem::CopyFile; \ 568 using FileSystem::IsDirectory; \ 569 using FileSystem::FlushCaches 570 571 /// A Wrapper class for Transactional FileSystem support. 572 /// This provides means to make use of the transactions with minimal code change 573 /// Any operations that are done through this interface will be through the 574 /// transaction created at the time of construction of this instance. 575 /// See FileSystem documentation for method descriptions. 576 /// This class simply forwards all calls to wrapped filesystem either with given 577 /// transaction token or with token used in its construction. This allows doing 578 /// transactional filesystem access with minimal code change. 579 class WrappedFileSystem : public FileSystem { 580 public: 581 TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; 582 NewRandomAccessFile(const std::string & fname,TransactionToken * token,std::unique_ptr<RandomAccessFile> * result)583 tensorflow::Status NewRandomAccessFile( 584 const std::string& fname, TransactionToken* token, 585 std::unique_ptr<RandomAccessFile>* result) override { 586 return fs_->NewRandomAccessFile(fname, (token ? token : token_), result); 587 } 588 NewWritableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)589 tensorflow::Status NewWritableFile( 590 const std::string& fname, TransactionToken* token, 591 std::unique_ptr<WritableFile>* result) override { 592 return fs_->NewWritableFile(fname, (token ? token : token_), result); 593 } 594 NewAppendableFile(const std::string & fname,TransactionToken * token,std::unique_ptr<WritableFile> * result)595 tensorflow::Status NewAppendableFile( 596 const std::string& fname, TransactionToken* token, 597 std::unique_ptr<WritableFile>* result) override { 598 return fs_->NewAppendableFile(fname, (token ? token : token_), result); 599 } 600 NewReadOnlyMemoryRegionFromFile(const std::string & fname,TransactionToken * token,std::unique_ptr<ReadOnlyMemoryRegion> * result)601 tensorflow::Status NewReadOnlyMemoryRegionFromFile( 602 const std::string& fname, TransactionToken* token, 603 std::unique_ptr<ReadOnlyMemoryRegion>* result) override { 604 return fs_->NewReadOnlyMemoryRegionFromFile(fname, (token ? token : token_), 605 result); 606 } 607 FileExists(const std::string & fname,TransactionToken * token)608 tensorflow::Status FileExists(const std::string& fname, 609 TransactionToken* token) override { 610 return fs_->FileExists(fname, (token ? token : token_)); 611 } 612 FilesExist(const std::vector<string> & files,TransactionToken * token,std::vector<Status> * status)613 bool FilesExist(const std::vector<string>& files, TransactionToken* token, 614 std::vector<Status>* status) override { 615 return fs_->FilesExist(files, (token ? token : token_), status); 616 } 617 GetChildren(const std::string & dir,TransactionToken * token,std::vector<string> * result)618 tensorflow::Status GetChildren(const std::string& dir, 619 TransactionToken* token, 620 std::vector<string>* result) override { 621 return fs_->GetChildren(dir, (token ? token : token_), result); 622 } 623 GetMatchingPaths(const std::string & pattern,TransactionToken * token,std::vector<string> * results)624 tensorflow::Status GetMatchingPaths(const std::string& pattern, 625 TransactionToken* token, 626 std::vector<string>* results) override { 627 return fs_->GetMatchingPaths(pattern, (token ? token : token_), results); 628 } 629 Match(const std::string & filename,const std::string & pattern)630 bool Match(const std::string& filename, const std::string& pattern) override { 631 return fs_->Match(filename, pattern); 632 } 633 Stat(const std::string & fname,TransactionToken * token,FileStatistics * stat)634 tensorflow::Status Stat(const std::string& fname, TransactionToken* token, 635 FileStatistics* stat) override { 636 return fs_->Stat(fname, (token ? token : token_), stat); 637 } 638 DeleteFile(const std::string & fname,TransactionToken * token)639 tensorflow::Status DeleteFile(const std::string& fname, 640 TransactionToken* token) override { 641 return fs_->DeleteFile(fname, (token ? token : token_)); 642 } 643 CreateDir(const std::string & dirname,TransactionToken * token)644 tensorflow::Status CreateDir(const std::string& dirname, 645 TransactionToken* token) override { 646 return fs_->CreateDir(dirname, (token ? token : token_)); 647 } 648 RecursivelyCreateDir(const std::string & dirname,TransactionToken * token)649 tensorflow::Status RecursivelyCreateDir(const std::string& dirname, 650 TransactionToken* token) override { 651 return fs_->RecursivelyCreateDir(dirname, (token ? token : token_)); 652 } 653 DeleteDir(const std::string & dirname,TransactionToken * token)654 tensorflow::Status DeleteDir(const std::string& dirname, 655 TransactionToken* token) override { 656 return fs_->DeleteDir(dirname, (token ? token : token_)); 657 } 658 DeleteRecursively(const std::string & dirname,TransactionToken * token,int64_t * undeleted_files,int64_t * undeleted_dirs)659 tensorflow::Status DeleteRecursively(const std::string& dirname, 660 TransactionToken* token, 661 int64_t* undeleted_files, 662 int64_t* undeleted_dirs) override { 663 return fs_->DeleteRecursively(dirname, (token ? token : token_), 664 undeleted_files, undeleted_dirs); 665 } 666 GetFileSize(const std::string & fname,TransactionToken * token,uint64 * file_size)667 tensorflow::Status GetFileSize(const std::string& fname, 668 TransactionToken* token, 669 uint64* file_size) override { 670 return fs_->GetFileSize(fname, (token ? token : token_), file_size); 671 } 672 RenameFile(const std::string & src,const std::string & target,TransactionToken * token)673 tensorflow::Status RenameFile(const std::string& src, 674 const std::string& target, 675 TransactionToken* token) override { 676 return fs_->RenameFile(src, target, (token ? token : token_)); 677 } 678 CopyFile(const std::string & src,const std::string & target,TransactionToken * token)679 tensorflow::Status CopyFile(const std::string& src, const std::string& target, 680 TransactionToken* token) override { 681 return fs_->CopyFile(src, target, (token ? token : token_)); 682 } 683 TranslateName(const std::string & name)684 std::string TranslateName(const std::string& name) const override { 685 return fs_->TranslateName(name); 686 } 687 IsDirectory(const std::string & fname,TransactionToken * token)688 tensorflow::Status IsDirectory(const std::string& fname, 689 TransactionToken* token) override { 690 return fs_->IsDirectory(fname, (token ? token : token_)); 691 } 692 HasAtomicMove(const std::string & path,bool * has_atomic_move)693 Status HasAtomicMove(const std::string& path, 694 bool* has_atomic_move) override { 695 return fs_->HasAtomicMove(path, has_atomic_move); 696 } 697 FlushCaches(TransactionToken * token)698 void FlushCaches(TransactionToken* token) override { 699 return fs_->FlushCaches((token ? token : token_)); 700 } 701 Separator()702 char Separator() const override { return fs_->Separator(); } 703 Basename(StringPiece path)704 StringPiece Basename(StringPiece path) const override { 705 return fs_->Basename(path); 706 } 707 StartTransaction(TransactionToken ** token)708 tensorflow::Status StartTransaction(TransactionToken** token) override { 709 return fs_->StartTransaction(token); 710 } 711 AddToTransaction(const std::string & path,TransactionToken * token)712 tensorflow::Status AddToTransaction(const std::string& path, 713 TransactionToken* token) override { 714 return fs_->AddToTransaction(path, (token ? token : token_)); 715 } 716 EndTransaction(TransactionToken * token)717 tensorflow::Status EndTransaction(TransactionToken* token) override { 718 return fs_->EndTransaction(token); 719 } 720 GetTransactionForPath(const std::string & path,TransactionToken ** token)721 tensorflow::Status GetTransactionForPath(const std::string& path, 722 TransactionToken** token) override { 723 return fs_->GetTransactionForPath(path, token); 724 } 725 GetTokenOrStartTransaction(const std::string & path,TransactionToken ** token)726 tensorflow::Status GetTokenOrStartTransaction( 727 const std::string& path, TransactionToken** token) override { 728 return fs_->GetTokenOrStartTransaction(path, token); 729 } 730 DecodeTransaction(const TransactionToken * token)731 std::string DecodeTransaction(const TransactionToken* token) override { 732 return fs_->DecodeTransaction((token ? token : token_)); 733 } 734 WrappedFileSystem(FileSystem * file_system,TransactionToken * token)735 WrappedFileSystem(FileSystem* file_system, TransactionToken* token) 736 : fs_(file_system), token_(token) {} 737 738 ~WrappedFileSystem() override = default; 739 740 private: 741 FileSystem* fs_; 742 TransactionToken* token_; 743 }; 744 745 /// A file abstraction for randomly reading the contents of a file. 746 class RandomAccessFile { 747 public: RandomAccessFile()748 RandomAccessFile() {} 749 virtual ~RandomAccessFile() = default; 750 751 /// \brief Returns the name of the file. 752 /// 753 /// This is an optional operation that may not be implemented by every 754 /// filesystem. Name(StringPiece * result)755 virtual tensorflow::Status Name(StringPiece* result) const { 756 return errors::Unimplemented("This filesystem does not support Name()"); 757 } 758 759 /// \brief Reads up to `n` bytes from the file starting at `offset`. 760 /// 761 /// `scratch[0..n-1]` may be written by this routine. Sets `*result` 762 /// to the data that was read (including if fewer than `n` bytes were 763 /// successfully read). May set `*result` to point at data in 764 /// `scratch[0..n-1]`, so `scratch[0..n-1]` must be live when 765 /// `*result` is used. 766 /// 767 /// On OK returned status: `n` bytes have been stored in `*result`. 768 /// On non-OK returned status: `[0..n]` bytes have been stored in `*result`. 769 /// 770 /// Returns `OUT_OF_RANGE` if fewer than n bytes were stored in `*result` 771 /// because of EOF. 772 /// 773 /// Safe for concurrent use by multiple threads. 774 virtual tensorflow::Status Read(uint64 offset, size_t n, StringPiece* result, 775 char* scratch) const = 0; 776 777 #if defined(TF_CORD_SUPPORT) 778 /// \brief Read up to `n` bytes from the file starting at `offset`. Read(uint64 offset,size_t n,absl::Cord * cord)779 virtual tensorflow::Status Read(uint64 offset, size_t n, 780 absl::Cord* cord) const { 781 return errors::Unimplemented( 782 "Read(uint64, size_t, absl::Cord*) is not " 783 "implemented"); 784 } 785 #endif 786 787 private: 788 TF_DISALLOW_COPY_AND_ASSIGN(RandomAccessFile); 789 }; 790 791 /// \brief A file abstraction for sequential writing. 792 /// 793 /// The implementation must provide buffering since callers may append 794 /// small fragments at a time to the file. 795 class WritableFile { 796 public: WritableFile()797 WritableFile() {} 798 virtual ~WritableFile() = default; 799 800 /// \brief Append 'data' to the file. 801 virtual tensorflow::Status Append(StringPiece data) = 0; 802 803 #if defined(TF_CORD_SUPPORT) 804 // \brief Append 'data' to the file. Append(const absl::Cord & cord)805 virtual tensorflow::Status Append(const absl::Cord& cord) { 806 for (StringPiece chunk : cord.Chunks()) { 807 TF_RETURN_IF_ERROR(Append(chunk)); 808 } 809 return OkStatus(); 810 } 811 #endif 812 813 /// \brief Close the file. 814 /// 815 /// Flush() and de-allocate resources associated with this file 816 /// 817 /// Typical return codes (not guaranteed to be exhaustive): 818 /// * OK 819 /// * Other codes, as returned from Flush() 820 virtual tensorflow::Status Close() = 0; 821 822 /// \brief Flushes the file and optionally syncs contents to filesystem. 823 /// 824 /// This should flush any local buffers whose contents have not been 825 /// delivered to the filesystem. 826 /// 827 /// If the process terminates after a successful flush, the contents 828 /// may still be persisted, since the underlying filesystem may 829 /// eventually flush the contents. If the OS or machine crashes 830 /// after a successful flush, the contents may or may not be 831 /// persisted, depending on the implementation. 832 virtual tensorflow::Status Flush() = 0; 833 834 // \brief Returns the name of the file. 835 /// 836 /// This is an optional operation that may not be implemented by every 837 /// filesystem. Name(StringPiece * result)838 virtual tensorflow::Status Name(StringPiece* result) const { 839 return errors::Unimplemented("This filesystem does not support Name()"); 840 } 841 842 /// \brief Syncs contents of file to filesystem. 843 /// 844 /// This waits for confirmation from the filesystem that the contents 845 /// of the file have been persisted to the filesystem; if the OS 846 /// or machine crashes after a successful Sync, the contents should 847 /// be properly saved. 848 virtual tensorflow::Status Sync() = 0; 849 850 /// \brief Retrieves the current write position in the file, or -1 on 851 /// error. 852 /// 853 /// This is an optional operation, subclasses may choose to return 854 /// errors::Unimplemented. Tell(int64_t * position)855 virtual tensorflow::Status Tell(int64_t* position) { 856 *position = -1; 857 return errors::Unimplemented("This filesystem does not support Tell()"); 858 } 859 860 private: 861 TF_DISALLOW_COPY_AND_ASSIGN(WritableFile); 862 }; 863 864 /// \brief A readonly memmapped file abstraction. 865 /// 866 /// The implementation must guarantee that all memory is accessible when the 867 /// object exists, independently from the Env that created it. 868 class ReadOnlyMemoryRegion { 869 public: ReadOnlyMemoryRegion()870 ReadOnlyMemoryRegion() {} 871 virtual ~ReadOnlyMemoryRegion() = default; 872 873 /// \brief Returns a pointer to the memory region. 874 virtual const void* data() = 0; 875 876 /// \brief Returns the length of the memory region in bytes. 877 virtual uint64 length() = 0; 878 }; 879 880 /// \brief A registry for file system implementations. 881 /// 882 /// Filenames are specified as an URI, which is of the form 883 /// [scheme://]<filename>. 884 /// File system implementations are registered using the REGISTER_FILE_SYSTEM 885 /// macro, providing the 'scheme' as the key. 886 /// 887 /// There are two `Register` methods: one using `Factory` for legacy filesystems 888 /// (deprecated mechanism of subclassing `FileSystem` and using 889 /// `REGISTER_FILE_SYSTEM` macro), and one using `std::unique_ptr<FileSystem>` 890 /// for the new modular approach. 891 /// 892 /// Note that the new API expects a pointer to `ModularFileSystem` but this is 893 /// not checked as there should be exactly one caller to the API and doing the 894 /// check results in a circular dependency between `BUILD` targets. 895 /// 896 /// Plan is to completely remove the filesystem registration from `Env` and 897 /// incorporate it into `ModularFileSystem` class (which will be renamed to be 898 /// the only `FileSystem` class and marked as `final`). But this will happen at 899 /// a later time, after we convert all filesystems to the new API. 900 /// 901 /// TODO(b/139060984): After all filesystems are converted, remove old 902 /// registration and update comment. 903 class FileSystemRegistry { 904 public: 905 typedef std::function<FileSystem*()> Factory; 906 907 virtual ~FileSystemRegistry() = default; 908 virtual tensorflow::Status Register(const std::string& scheme, 909 Factory factory) = 0; 910 virtual tensorflow::Status Register( 911 const std::string& scheme, std::unique_ptr<FileSystem> filesystem) = 0; 912 virtual FileSystem* Lookup(const std::string& scheme) = 0; 913 virtual tensorflow::Status GetRegisteredFileSystemSchemes( 914 std::vector<std::string>* schemes) = 0; 915 }; 916 917 } // namespace tensorflow 918 919 #endif // TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ 920