1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_LIB_IO_SNAPPY_SNAPPY_INPUTBUFFER_H_ 17 #define TENSORFLOW_CORE_LIB_IO_SNAPPY_SNAPPY_INPUTBUFFER_H_ 18 19 #include <string> 20 #include "tensorflow/core/lib/core/status.h" 21 #include "tensorflow/core/lib/io/inputstream_interface.h" 22 #include "tensorflow/core/platform/env.h" 23 #include "tensorflow/core/platform/macros.h" 24 #include "tensorflow/core/platform/snappy.h" 25 #include "tensorflow/core/platform/types.h" 26 27 namespace tensorflow { 28 namespace io { 29 30 // An SnappyInputBuffer provides support for reading from a file compressed 31 // using snappy (https://github.com/google/snappy). 32 // 33 // A given instance of an SnappyInputBuffer is NOT safe for concurrent use 34 // by multiple threads 35 class SnappyInputBuffer : public InputStreamInterface { 36 public: 37 // Create a SnappyInputBuffer for `file` with a buffer of size 38 // `input_buffer_bytes` bytes for reading contents from `file` and another 39 // buffer with size `output_buffer_bytes` for caching decompressed contents. 40 // Does *not* take ownership of "file". 41 SnappyInputBuffer(RandomAccessFile* file, size_t input_buffer_bytes, 42 size_t output_buffer_bytes); 43 44 // Reads bytes_to_read bytes into *result, overwriting *result. 45 // 46 // Return Status codes: 47 // OK: 48 // If successful. 49 // OUT_OF_RANGE: 50 // If there are not enough bytes to read before the end of the file. 51 // DATA_LOSS: 52 // If uncompression failed or if the file is corrupted. 53 // RESOURCE_EXHAUSTED: 54 // If input_buffer_ is smaller in size than a compressed block. 55 // others: 56 // If reading from file failed. 57 Status ReadNBytes(int64 bytes_to_read, tstring* result) override; 58 59 int64 Tell() const override; 60 61 Status Reset() override; 62 63 private: 64 // Reads data from `file_` and tries to fill up `input_buffer_` if enough 65 // unread data is left in `file_`. 66 // 67 // Looks up `next_in_` to check how much data in `input_buffer_` 68 // has already been read. The used data is removed and new data is added to 69 // after any unread data in `input_buffer_`. 70 // After this call `next_in` points to the start of `input_buffer_` 71 // and `avail_in_` stores the number of readable bytes in 72 // `input_buffer_`. 73 // 74 // Returns OutOfRange error if NO data could be read from file. Note that this 75 // won't return an OutOfRange if there wasn't sufficient data in file to 76 // completely fill up `input_buffer_`. 77 Status ReadFromFile(); 78 79 // Reads the length of the next compressed block stored in the next 4 bytes at 80 // `next_in_`. Uncompresses the next compressed block and writes the output 81 // produced to the output_buffer_. 82 // Should be called only after the cached output has been consumed. 83 Status Inflate(); 84 85 // Starts reading bytes at `next_out_` until either `bytes_to_read` 86 // bytes have been read or `next_out_` is reached. 87 // Returns the number of bytes read and advances the `next_out_` 88 // pointer to the next location to read from. 89 size_t ReadBytesFromCache(size_t bytes_to_read, char* result); 90 91 // Reads the length of the next *compressed* block and stores in `length`. 92 // The length is stored in 4 bytes in little endian notation. 93 Status ReadCompressedBlockLength(uint32* length); 94 95 RandomAccessFile* file_; // Not owned 96 int64 file_pos_ = 0; // Next position to read from in `file_` 97 size_t input_buffer_capacity_; // Size of `input_buffer_`. 98 // Must be at least as big as the size of 99 // the largest compressed block. 100 size_t output_buffer_capacity_; // Size of `output_buffer_` 101 102 // Buffer for storing contents read from compressed file. 103 // TODO(srbs): Consider using circular buffers. That would greatly simplify 104 // the implementation. 105 std::unique_ptr<char[]> input_buffer_; 106 107 // Buffer for storing inflated contents of `file_`. 108 std::unique_ptr<char[]> output_buffer_; 109 110 // Next unread byte in `input_buffer_`. 111 char* next_in_; 112 113 // Next unread byte in `output_buffer_` 114 char* next_out_; 115 116 // Number of unread bytes available at `next_in_` in `input_buffer_`. 117 size_t avail_in_ = 0; 118 119 // Number of unread bytes available at `next_out_` in `output_buffer_`. 120 size_t avail_out_ = 0; 121 122 // Number of *uncompressed* bytes that have been read from this stream. 123 int64 bytes_read_; 124 125 TF_DISALLOW_COPY_AND_ASSIGN(SnappyInputBuffer); 126 }; 127 128 } // namespace io 129 } // namespace tensorflow 130 131 #endif // TENSORFLOW_CORE_LIB_IO_SNAPPY_SNAPPY_INPUTBUFFER_H_ 132