• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_LIB_IO_ZLIB_INPUTSTREAM_H_
17 #define TENSORFLOW_CORE_LIB_IO_ZLIB_INPUTSTREAM_H_
18 
19 #include <string>
20 
21 #include "tensorflow/core/lib/core/status.h"
22 #include "tensorflow/core/lib/io/inputstream_interface.h"
23 #include "tensorflow/core/lib/io/zlib_compression_options.h"
24 #include "tensorflow/core/platform/env.h"
25 #include "tensorflow/core/platform/macros.h"
26 #include "tensorflow/core/platform/types.h"
27 
28 namespace tensorflow {
29 namespace io {
30 
31 // Forward declare some members of zlib.h, which is only included in the
32 // .cc file.
33 struct ZStreamDef;
34 
35 // An ZlibInputStream provides support for reading from a stream compressed
36 // using zlib (http://www.zlib.net/). Buffers the contents of the file.
37 //
38 // A given instance of an ZlibInputStream is NOT safe for concurrent use
39 // by multiple threads
40 class ZlibInputStream : public InputStreamInterface {
41  public:
42   // Create a ZlibInputStream for `input_stream` with a buffer of size
43   // `input_buffer_bytes` bytes for reading contents from `input_stream` and
44   // another buffer with size `output_buffer_bytes` for caching decompressed
45   // contents.
46   //
47   // Takes ownership of `input_stream` iff `owns_input_stream` is true.
48   ZlibInputStream(InputStreamInterface* input_stream, size_t input_buffer_bytes,
49                   size_t output_buffer_bytes,
50                   const ZlibCompressionOptions& zlib_options,
51                   bool owns_input_stream);
52 
53   // Equivalent to the previous constructor with owns_input_stream=false.
54   ZlibInputStream(InputStreamInterface* input_stream, size_t input_buffer_bytes,
55                   size_t output_buffer_bytes,
56                   const ZlibCompressionOptions& zlib_options);
57 
58   ~ZlibInputStream();
59 
60   // Reads bytes_to_read bytes into *result, overwriting *result.
61   //
62   // Return Status codes:
63   // OK:           If successful.
64   // OUT_OF_RANGE: If there are not enough bytes to read before
65   //               the end of the stream.
66   // ABORTED:      If inflate() fails, we return the error code with the
67   //               error message in `z_stream_->msg`.
68   // others:       If reading from stream failed.
69   Status ReadNBytes(int64 bytes_to_read, tstring* result) override;
70 
71 #if defined(TF_CORD_SUPPORT)
72   Status ReadNBytes(int64 bytes_to_read, absl::Cord* result) override;
73 #endif
74 
75   int64 Tell() const override;
76 
77   Status Reset() override;
78 
79  private:
80   void InitZlibBuffer();
81 
82   const bool owns_input_stream_;
83   InputStreamInterface* input_stream_;
84   size_t input_buffer_capacity_;   // Size of z_stream_input_
85   size_t output_buffer_capacity_;  // Size of z_stream_output_
86   char* next_unread_byte_;         // Next unread byte in z_stream_output_
87   bool init_error_ = false;        // Whether we encountered an error in init.
88 
89   ZlibCompressionOptions const zlib_options_;
90 
91   std::unique_ptr<ZStreamDef> z_stream_def_;
92 
93   // Reads data from `input_stream_` and tries to fill up `z_stream_input_` if
94   // enough unread data is left in `input_stream_`.
95   //
96   // Looks up z_stream_->next_in to check how much data in z_stream_input_
97   // has already been read. The used data is removed and new data is added to
98   // after any unread data in z_stream_input_.
99   // After this call z_stream_->next_in points to the start of z_stream_input_
100   // and z_stream_->avail_in stores the number of readable bytes in
101   // z_stream_input_.
102   //
103   // Returns OutOfRange error if NO data could be read from stream. Note that
104   // this won't return an OutOfRange if there wasn't sufficient data in stream
105   // to completely fill up z_stream_input_.
106   Status ReadFromStream();
107 
108   // Calls `inflate()` and returns DataLoss Status if it failed.
109   Status Inflate();
110 
111   // Starts reading bytes at `next_unread_byte_` till either `bytes_to_read`
112   // bytes have been read or `z_stream_->next_out` is reached.
113   // Returns the number of bytes read and advances the `next_unread_byte_`
114   // pointer to the next location to read from.
115   size_t ReadBytesFromCache(size_t bytes_to_read, tstring* result);
116 
117   // The number of unread bytes in z_stream_output_.
118   //
119   // z_stream_output_  -->
120   //
121   // [RRRRRRRRRRRRRRRRRRUUUUUUUUUUUUUU000000000000000000]
122   //                    ^             ^
123   //           next_unread_byte_    z_stream_->next_out
124   //
125   // R: Read bytes
126   // U: Unread bytes
127   // 0: garbage bytes where new output will be written
128   //
129   // Returns the size of [next_unread_byte_, z_stream_->next_out)
130   size_t NumUnreadBytes() const;
131 
132   // Number of *uncompressed* bytes that have been read from this stream.
133   int64 bytes_read_;
134 
135   TF_DISALLOW_COPY_AND_ASSIGN(ZlibInputStream);
136 };
137 
138 }  // namespace io
139 }  // namespace tensorflow
140 
141 #endif  // TENSORFLOW_CORE_LIB_IO_ZLIB_INPUTSTREAM_H_
142