1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/filter/gzip_source_stream.h"
6
7 #include <algorithm>
8 #include <memory>
9 #include <utility>
10 #include <zlib.h>
11
12 #include "base/bit_cast.h"
13 #include "base/check_op.h"
14 #include "base/functional/bind.h"
15 #include "base/memory/ptr_util.h"
16 #include "base/memory/ref_counted.h"
17 #include "base/notreached.h"
18 #include "base/numerics/checked_math.h"
19 #include "net/base/io_buffer.h"
20
21 namespace net {
22
23 namespace {
24
25 const char kDeflate[] = "DEFLATE";
26 const char kGzip[] = "GZIP";
27
28 // For deflate streams, if more than this many bytes have been received without
29 // an error and without adding a Zlib header, assume the original stream had a
30 // Zlib header. In practice, don't need nearly this much data, but since the
31 // detection logic is a heuristic, best to be safe. Data is freed once it's been
32 // determined whether the stream has a zlib header or not, so larger values
33 // shouldn't affect memory usage, in practice.
34 const int kMaxZlibHeaderSniffBytes = 1000;
35
36 } // namespace
37
~GzipSourceStream()38 GzipSourceStream::~GzipSourceStream() {
39 if (zlib_stream_)
40 inflateEnd(zlib_stream_.get());
41 }
42
Create(std::unique_ptr<SourceStream> upstream,SourceStream::SourceType type)43 std::unique_ptr<GzipSourceStream> GzipSourceStream::Create(
44 std::unique_ptr<SourceStream> upstream,
45 SourceStream::SourceType type) {
46 DCHECK(type == TYPE_GZIP || type == TYPE_DEFLATE);
47 auto source =
48 base::WrapUnique(new GzipSourceStream(std::move(upstream), type));
49
50 if (!source->Init())
51 return nullptr;
52 return source;
53 }
54
GzipSourceStream(std::unique_ptr<SourceStream> upstream,SourceStream::SourceType type)55 GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream,
56 SourceStream::SourceType type)
57 : FilterSourceStream(type, std::move(upstream)) {}
58
Init()59 bool GzipSourceStream::Init() {
60 zlib_stream_ = std::make_unique<z_stream>();
61 if (!zlib_stream_)
62 return false;
63 memset(zlib_stream_.get(), 0, sizeof(z_stream));
64
65 int ret;
66 if (type() == TYPE_GZIP) {
67 ret = inflateInit2(zlib_stream_.get(), -MAX_WBITS);
68 } else {
69 ret = inflateInit(zlib_stream_.get());
70 }
71 DCHECK_NE(Z_VERSION_ERROR, ret);
72 return ret == Z_OK;
73 }
74
GetTypeAsString() const75 std::string GzipSourceStream::GetTypeAsString() const {
76 switch (type()) {
77 case TYPE_GZIP:
78 return kGzip;
79 case TYPE_DEFLATE:
80 return kDeflate;
81 default:
82 NOTREACHED();
83 return "";
84 }
85 }
86
FilterData(IOBuffer * output_buffer,size_t output_buffer_size,IOBuffer * input_buffer,size_t input_buffer_size,size_t * consumed_bytes,bool upstream_end_reached)87 base::expected<size_t, Error> GzipSourceStream::FilterData(
88 IOBuffer* output_buffer,
89 size_t output_buffer_size,
90 IOBuffer* input_buffer,
91 size_t input_buffer_size,
92 size_t* consumed_bytes,
93 bool upstream_end_reached) {
94 *consumed_bytes = 0;
95 char* input_data = input_buffer->data();
96 size_t input_data_size = input_buffer_size;
97 size_t bytes_out = 0;
98 bool state_compressed_entered = false;
99 while (input_data_size > 0 && bytes_out < output_buffer_size) {
100 InputState state = input_state_;
101 switch (state) {
102 case STATE_START: {
103 if (type() == TYPE_DEFLATE) {
104 input_state_ = STATE_SNIFFING_DEFLATE_HEADER;
105 break;
106 }
107 DCHECK_GT(input_data_size, 0u);
108 input_state_ = STATE_GZIP_HEADER;
109 break;
110 }
111 case STATE_GZIP_HEADER: {
112 DCHECK_NE(TYPE_DEFLATE, type());
113
114 const size_t kGzipFooterBytes = 8;
115 const char* end = nullptr;
116 GZipHeader::Status status =
117 gzip_header_.ReadMore(input_data, input_data_size, &end);
118 if (status == GZipHeader::INCOMPLETE_HEADER) {
119 input_data += input_data_size;
120 input_data_size = 0;
121 } else if (status == GZipHeader::COMPLETE_HEADER) {
122 // If there is a valid header, there should also be a valid footer.
123 gzip_footer_bytes_left_ = kGzipFooterBytes;
124 size_t bytes_consumed = static_cast<size_t>(end - input_data);
125 input_data += bytes_consumed;
126 input_data_size -= bytes_consumed;
127 input_state_ = STATE_COMPRESSED_BODY;
128 } else if (status == GZipHeader::INVALID_HEADER) {
129 return base::unexpected(ERR_CONTENT_DECODING_FAILED);
130 }
131 break;
132 }
133 case STATE_SNIFFING_DEFLATE_HEADER: {
134 DCHECK_EQ(TYPE_DEFLATE, type());
135
136 zlib_stream_.get()->next_in = base::bit_cast<Bytef*>(input_data);
137 zlib_stream_.get()->avail_in = input_data_size;
138 zlib_stream_.get()->next_out =
139 base::bit_cast<Bytef*>(output_buffer->data());
140 zlib_stream_.get()->avail_out = output_buffer_size;
141
142 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
143
144 // On error, try adding a zlib header and replaying the response. Note
145 // that data just received doesn't have to be replayed, since it hasn't
146 // been removed from input_data yet, only data from previous FilterData
147 // calls needs to be replayed.
148 if (ret != Z_STREAM_END && ret != Z_OK) {
149 if (!InsertZlibHeader())
150 return base::unexpected(ERR_CONTENT_DECODING_FAILED);
151
152 input_state_ = STATE_REPLAY_DATA;
153 // |replay_state_| should still have its initial value.
154 DCHECK_EQ(STATE_COMPRESSED_BODY, replay_state_);
155 break;
156 }
157
158 size_t bytes_used = input_data_size - zlib_stream_.get()->avail_in;
159 bytes_out = output_buffer_size - zlib_stream_.get()->avail_out;
160 // If any bytes are output, enough total bytes have been received, or at
161 // the end of the stream, assume the response had a valid Zlib header.
162 if (bytes_out > 0 ||
163 bytes_used + replay_data_.size() >= kMaxZlibHeaderSniffBytes ||
164 ret == Z_STREAM_END) {
165 replay_data_.clear();
166 if (ret == Z_STREAM_END) {
167 input_state_ = STATE_GZIP_FOOTER;
168 } else {
169 input_state_ = STATE_COMPRESSED_BODY;
170 }
171 } else {
172 replay_data_.append(input_data, bytes_used);
173 }
174
175 input_data_size -= bytes_used;
176 input_data += bytes_used;
177 break;
178 }
179 case STATE_REPLAY_DATA: {
180 DCHECK_EQ(TYPE_DEFLATE, type());
181
182 if (replay_data_.empty()) {
183 input_state_ = replay_state_;
184 break;
185 }
186
187 // Call FilterData recursively, after updating |input_state_|, with
188 // |replay_data_|. This recursive call makes handling data from
189 // |replay_data_| and |input_buffer| much simpler than the alternative
190 // operations, though it's not pretty.
191 input_state_ = replay_state_;
192 size_t bytes_used;
193 scoped_refptr<IOBuffer> replay_buffer =
194 base::MakeRefCounted<WrappedIOBuffer>(replay_data_.data());
195 base::expected<size_t, Error> result =
196 FilterData(output_buffer, output_buffer_size, replay_buffer.get(),
197 replay_data_.size(), &bytes_used, upstream_end_reached);
198 replay_data_.erase(0, bytes_used);
199 // Back up resulting state, and return state to STATE_REPLAY_DATA.
200 replay_state_ = input_state_;
201 input_state_ = STATE_REPLAY_DATA;
202
203 // Could continue consuming data in the success case, but simplest not
204 // to.
205 if (!result.has_value() || result.value() != 0)
206 return result;
207 break;
208 }
209 case STATE_COMPRESSED_BODY: {
210 DCHECK(!state_compressed_entered);
211
212 state_compressed_entered = true;
213 zlib_stream_.get()->next_in = base::bit_cast<Bytef*>(input_data);
214 zlib_stream_.get()->avail_in = input_data_size;
215 zlib_stream_.get()->next_out =
216 base::bit_cast<Bytef*>(output_buffer->data());
217 zlib_stream_.get()->avail_out = output_buffer_size;
218
219 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
220 if (ret != Z_STREAM_END && ret != Z_OK)
221 return base::unexpected(ERR_CONTENT_DECODING_FAILED);
222
223 size_t bytes_used = input_data_size - zlib_stream_.get()->avail_in;
224 bytes_out = output_buffer_size - zlib_stream_.get()->avail_out;
225 input_data_size -= bytes_used;
226 input_data += bytes_used;
227 if (ret == Z_STREAM_END)
228 input_state_ = STATE_GZIP_FOOTER;
229 // zlib has written as much data to |output_buffer| as it could.
230 // There might still be some unconsumed data in |input_buffer| if there
231 // is no space in |output_buffer|.
232 break;
233 }
234 case STATE_GZIP_FOOTER: {
235 size_t to_read = std::min(gzip_footer_bytes_left_, input_data_size);
236 gzip_footer_bytes_left_ -= to_read;
237 input_data_size -= to_read;
238 input_data += to_read;
239 if (gzip_footer_bytes_left_ == 0)
240 input_state_ = STATE_IGNORING_EXTRA_BYTES;
241 break;
242 }
243 case STATE_IGNORING_EXTRA_BYTES: {
244 input_data_size = 0;
245 break;
246 }
247 }
248 }
249 *consumed_bytes = input_buffer_size - input_data_size;
250 return bytes_out;
251 }
252
InsertZlibHeader()253 bool GzipSourceStream::InsertZlibHeader() {
254 char dummy_header[] = {0x78, 0x01};
255 char dummy_output[4];
256
257 inflateReset(zlib_stream_.get());
258 zlib_stream_.get()->next_in = base::bit_cast<Bytef*>(&dummy_header[0]);
259 zlib_stream_.get()->avail_in = sizeof(dummy_header);
260 zlib_stream_.get()->next_out = base::bit_cast<Bytef*>(&dummy_output[0]);
261 zlib_stream_.get()->avail_out = sizeof(dummy_output);
262
263 int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
264 return ret == Z_OK;
265 }
266
267 } // namespace net
268