1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_LIB_IO_ZLIB_INPUTSTREAM_H_ 17 #define TENSORFLOW_CORE_LIB_IO_ZLIB_INPUTSTREAM_H_ 18 19 #include <string> 20 21 #include "tensorflow/core/lib/core/status.h" 22 #include "tensorflow/core/lib/io/inputstream_interface.h" 23 #include "tensorflow/core/lib/io/zlib_compression_options.h" 24 #include "tensorflow/core/platform/env.h" 25 #include "tensorflow/core/platform/macros.h" 26 #include "tensorflow/core/platform/types.h" 27 28 namespace tensorflow { 29 namespace io { 30 31 // Forward declare some members of zlib.h, which is only included in the 32 // .cc file. 33 struct ZStreamDef; 34 35 // An ZlibInputStream provides support for reading from a stream compressed 36 // using zlib (http://www.zlib.net/). Buffers the contents of the file. 37 // 38 // A given instance of an ZlibInputStream is NOT safe for concurrent use 39 // by multiple threads 40 class ZlibInputStream : public InputStreamInterface { 41 public: 42 // Create a ZlibInputStream for `input_stream` with a buffer of size 43 // `input_buffer_bytes` bytes for reading contents from `input_stream` and 44 // another buffer with size `output_buffer_bytes` for caching decompressed 45 // contents. 46 // 47 // Takes ownership of `input_stream` iff `owns_input_stream` is true. 48 ZlibInputStream(InputStreamInterface* input_stream, size_t input_buffer_bytes, 49 size_t output_buffer_bytes, 50 const ZlibCompressionOptions& zlib_options, 51 bool owns_input_stream); 52 53 // Equivalent to the previous constructor with owns_input_stream=false. 54 ZlibInputStream(InputStreamInterface* input_stream, size_t input_buffer_bytes, 55 size_t output_buffer_bytes, 56 const ZlibCompressionOptions& zlib_options); 57 58 ~ZlibInputStream(); 59 60 // Reads bytes_to_read bytes into *result, overwriting *result. 61 // 62 // Return Status codes: 63 // OK: If successful. 64 // OUT_OF_RANGE: If there are not enough bytes to read before 65 // the end of the stream. 66 // ABORTED: If inflate() fails, we return the error code with the 67 // error message in `z_stream_->msg`. 68 // others: If reading from stream failed. 69 Status ReadNBytes(int64 bytes_to_read, tstring* result) override; 70 71 #if defined(TF_CORD_SUPPORT) 72 Status ReadNBytes(int64 bytes_to_read, absl::Cord* result) override; 73 #endif 74 75 int64 Tell() const override; 76 77 Status Reset() override; 78 79 private: 80 void InitZlibBuffer(); 81 82 const bool owns_input_stream_; 83 InputStreamInterface* input_stream_; 84 size_t input_buffer_capacity_; // Size of z_stream_input_ 85 size_t output_buffer_capacity_; // Size of z_stream_output_ 86 char* next_unread_byte_; // Next unread byte in z_stream_output_ 87 bool init_error_ = false; // Whether we encountered an error in init. 88 89 ZlibCompressionOptions const zlib_options_; 90 91 std::unique_ptr<ZStreamDef> z_stream_def_; 92 93 // Reads data from `input_stream_` and tries to fill up `z_stream_input_` if 94 // enough unread data is left in `input_stream_`. 95 // 96 // Looks up z_stream_->next_in to check how much data in z_stream_input_ 97 // has already been read. The used data is removed and new data is added to 98 // after any unread data in z_stream_input_. 99 // After this call z_stream_->next_in points to the start of z_stream_input_ 100 // and z_stream_->avail_in stores the number of readable bytes in 101 // z_stream_input_. 102 // 103 // Returns OutOfRange error if NO data could be read from stream. Note that 104 // this won't return an OutOfRange if there wasn't sufficient data in stream 105 // to completely fill up z_stream_input_. 106 Status ReadFromStream(); 107 108 // Calls `inflate()` and returns DataLoss Status if it failed. 109 Status Inflate(); 110 111 // Starts reading bytes at `next_unread_byte_` till either `bytes_to_read` 112 // bytes have been read or `z_stream_->next_out` is reached. 113 // Returns the number of bytes read and advances the `next_unread_byte_` 114 // pointer to the next location to read from. 115 size_t ReadBytesFromCache(size_t bytes_to_read, tstring* result); 116 117 // The number of unread bytes in z_stream_output_. 118 // 119 // z_stream_output_ --> 120 // 121 // [RRRRRRRRRRRRRRRRRRUUUUUUUUUUUUUU000000000000000000] 122 // ^ ^ 123 // next_unread_byte_ z_stream_->next_out 124 // 125 // R: Read bytes 126 // U: Unread bytes 127 // 0: garbage bytes where new output will be written 128 // 129 // Returns the size of [next_unread_byte_, z_stream_->next_out) 130 size_t NumUnreadBytes() const; 131 132 // Number of *uncompressed* bytes that have been read from this stream. 133 int64 bytes_read_; 134 135 TF_DISALLOW_COPY_AND_ASSIGN(ZlibInputStream); 136 }; 137 138 } // namespace io 139 } // namespace tensorflow 140 141 #endif // TENSORFLOW_CORE_LIB_IO_ZLIB_INPUTSTREAM_H_ 142