1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/sdch_filter.h"
6
7 #include <limits.h>
8 #include <ctype.h>
9 #include <algorithm>
10
11 #include "base/file_util.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "net/base/sdch_manager.h"
15
16 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
17
18 namespace net {
19
SdchFilter(const FilterContext & filter_context)20 SdchFilter::SdchFilter(const FilterContext& filter_context)
21 : filter_context_(filter_context),
22 decoding_status_(DECODING_UNINITIALIZED),
23 vcdiff_streaming_decoder_(NULL),
24 dictionary_hash_(),
25 dictionary_hash_is_plausible_(false),
26 dictionary_(NULL),
27 dest_buffer_excess_(),
28 dest_buffer_excess_index_(0),
29 source_bytes_(0),
30 output_bytes_(0),
31 possible_pass_through_(false) {
32 bool success = filter_context.GetMimeType(&mime_type_);
33 DCHECK(success);
34 success = filter_context.GetURL(&url_);
35 DCHECK(success);
36 }
37
~SdchFilter()38 SdchFilter::~SdchFilter() {
39 // All code here is for gathering stats, and can be removed when SDCH is
40 // considered stable.
41
42 static int filter_use_count = 0;
43 ++filter_use_count;
44 if (META_REFRESH_RECOVERY == decoding_status_) {
45 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
46 }
47
48 if (vcdiff_streaming_decoder_.get()) {
49 if (!vcdiff_streaming_decoder_->FinishDecoding()) {
50 decoding_status_ = DECODING_ERROR;
51 SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT);
52 // Make it possible for the user to hit reload, and get non-sdch content.
53 // Note this will "wear off" quickly enough, and is just meant to assure
54 // in some rare case that the user is not stuck.
55 SdchManager::BlacklistDomain(url_);
56 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
57 static_cast<int>(filter_context_.GetByteReadCount()));
58 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
59 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
60 }
61 }
62
63 if (!dest_buffer_excess_.empty()) {
64 // Filter chaining error, or premature teardown.
65 SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT);
66 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
67 static_cast<int>(filter_context_.GetByteReadCount()));
68 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
69 dest_buffer_excess_.size());
70 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
71 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
72 }
73
74 if (filter_context_.IsCachedContent()) {
75 // Not a real error, but it is useful to have this tally.
76 // TODO(jar): Remove this stat after SDCH stability is validated.
77 SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED);
78 return; // We don't need timing stats, and we aready got ratios.
79 }
80
81 switch (decoding_status_) {
82 case DECODING_IN_PROGRESS: {
83 if (output_bytes_)
84 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
85 static_cast<int>(
86 (filter_context_.GetByteReadCount() * 100) / output_bytes_));
87 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
88 output_bytes_);
89 filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
90
91 // Allow latency experiments to proceed.
92 SdchManager::Global()->SetAllowLatencyExperiment(url_, true);
93 return;
94 }
95 case PASS_THROUGH: {
96 filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
97 return;
98 }
99 case DECODING_UNINITIALIZED: {
100 SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED);
101 return;
102 }
103 case WAITING_FOR_DICTIONARY_SELECTION: {
104 SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY);
105 return;
106 }
107 case DECODING_ERROR: {
108 SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR);
109 return;
110 }
111 case META_REFRESH_RECOVERY: {
112 // Already accounted for when set.
113 return;
114 }
115 } // end of switch.
116 }
117
InitDecoding(Filter::FilterType filter_type)118 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
119 if (decoding_status_ != DECODING_UNINITIALIZED)
120 return false;
121
122 // Handle case where sdch filter is guessed, but not required.
123 if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
124 possible_pass_through_ = true;
125
126 // Initialize decoder only after we have a dictionary in hand.
127 decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
128 return true;
129 }
130
131 #ifndef NDEBUG
132 static const char* kDecompressionErrorHtml =
133 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
134 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
135 "border-color:black;border-style:solid;text-align:left;font-family:arial;"
136 "font-size:10pt;foreground-color:black;background-color:white\">"
137 "An error occurred. This page will be reloaded shortly. "
138 "Or press the \"reload\" button now to reload it immediately."
139 "</div>";
140 #else
141 static const char* kDecompressionErrorHtml =
142 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
143 #endif
144
ReadFilteredData(char * dest_buffer,int * dest_len)145 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
146 int* dest_len) {
147 int available_space = *dest_len;
148 *dest_len = 0; // Nothing output yet.
149
150 if (!dest_buffer || available_space <= 0)
151 return FILTER_ERROR;
152
153 if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
154 FilterStatus status = InitializeDictionary();
155 if (FILTER_NEED_MORE_DATA == status)
156 return FILTER_NEED_MORE_DATA;
157 if (FILTER_ERROR == status) {
158 DCHECK(DECODING_ERROR == decoding_status_);
159 DCHECK_EQ(0u, dest_buffer_excess_index_);
160 DCHECK(dest_buffer_excess_.empty());
161 // This is where we try very hard to do error recovery, and make this
162 // protocol robust in the face of proxies that do many different things.
163 // If we decide that things are looking very bad (too hard to recover),
164 // we may even issue a "meta-refresh" to reload the page without an SDCH
165 // advertisement (so that we are sure we're not hurting anything).
166 //
167 // Watch out for an error page inserted by the proxy as part of a 40x
168 // error response. When we see such content molestation, we certainly
169 // need to fall into the meta-refresh case.
170 if (filter_context_.GetResponseCode() == 404) {
171 // We could be more generous, but for now, only a "NOT FOUND" code will
172 // cause a pass through. All other bad codes will fall into a
173 // meta-refresh.
174 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE);
175 decoding_status_ = PASS_THROUGH;
176 } else if (filter_context_.GetResponseCode() != 200) {
177 // We need to meta-refresh, with SDCH disabled.
178 } else if (filter_context_.IsCachedContent()
179 && !dictionary_hash_is_plausible_) {
180 // We must have hit the back button, and gotten content that was fetched
181 // before we *really* advertised SDCH and a dictionary.
182 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED);
183 decoding_status_ = PASS_THROUGH;
184 } else if (possible_pass_through_) {
185 // This is the potentially most graceful response. There really was no
186 // error. We were just overly cautious when we added a TENTATIVE_SDCH.
187 // We added the sdch coding tag, and it should not have been added.
188 // This can happen in server experiments, where the server decides
189 // not to use sdch, even though there is a dictionary. To be
190 // conservative, we locally added the tentative sdch (fearing that a
191 // proxy stripped it!) and we must now recant (pass through).
192 SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH);
193 // However.... just to be sure we don't get burned by proxies that
194 // re-compress with gzip or other system, we can sniff to see if this
195 // is compressed data etc. For now, we do nothing, which gets us into
196 // the meta-refresh result.
197 // TODO(jar): Improve robustness by sniffing for valid text that we can
198 // actual use re: decoding_status_ = PASS_THROUGH;
199 } else if (dictionary_hash_is_plausible_) {
200 // We need a meta-refresh since we don't have the dictionary.
201 // The common cause is a restart of the browser, where we try to render
202 // cached content that was saved when we had a dictionary.
203 } else if (filter_context_.IsSdchResponse()) {
204 // This is a very corrupt SDCH request response. We can't decode it.
205 // We'll use a meta-refresh, and get content without asking for SDCH.
206 // This will also progressively disable SDCH for this domain.
207 } else {
208 // One of the first 9 bytes precluded consideration as a hash.
209 // This can't be an SDCH payload, even though the server said it was.
210 // This is a major error, as the server or proxy tagged this SDCH even
211 // though it is not!
212 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
213 // Worse yet, meta-refresh could lead to an infinite refresh loop.
214 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH);
215 decoding_status_ = PASS_THROUGH;
216 // ... but further back-off on advertising SDCH support.
217 SdchManager::BlacklistDomain(url_);
218 }
219
220 if (decoding_status_ == PASS_THROUGH) {
221 dest_buffer_excess_ = dictionary_hash_; // Send what we scanned.
222 } else {
223 // This is where we try to do the expensive meta-refresh.
224 if (std::string::npos == mime_type_.find("text/html")) {
225 // Since we can't do a meta-refresh (along with an exponential
226 // backoff), we'll just make sure this NEVER happens again.
227 SdchManager::BlacklistDomainForever(url_);
228 if (filter_context_.IsCachedContent())
229 SdchManager::SdchErrorRecovery(
230 SdchManager::CACHED_META_REFRESH_UNSUPPORTED);
231 else
232 SdchManager::SdchErrorRecovery(
233 SdchManager::META_REFRESH_UNSUPPORTED);
234 return FILTER_ERROR;
235 }
236 // HTML content means we can issue a meta-refresh, and get the content
237 // again, perhaps without SDCH (to be safe).
238 if (filter_context_.IsCachedContent()) {
239 // Cached content is probably a startup tab, so we'll just get fresh
240 // content and try again, without disabling sdch.
241 SdchManager::SdchErrorRecovery(
242 SdchManager::META_REFRESH_CACHED_RECOVERY);
243 } else {
244 // Since it wasn't in the cache, we definately need at least some
245 // period of blacklisting to get the correct content.
246 SdchManager::BlacklistDomain(url_);
247 SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY);
248 }
249 decoding_status_ = META_REFRESH_RECOVERY;
250 // Issue a meta redirect with SDCH disabled.
251 dest_buffer_excess_ = kDecompressionErrorHtml;
252 }
253 } else {
254 DCHECK(DECODING_IN_PROGRESS == decoding_status_);
255 }
256 }
257
258 int amount = OutputBufferExcess(dest_buffer, available_space);
259 *dest_len += amount;
260 dest_buffer += amount;
261 available_space -= amount;
262 DCHECK_GE(available_space, 0);
263
264 if (available_space <= 0)
265 return FILTER_OK;
266 DCHECK(dest_buffer_excess_.empty());
267 DCHECK_EQ(0u, dest_buffer_excess_index_);
268
269 if (decoding_status_ != DECODING_IN_PROGRESS) {
270 if (META_REFRESH_RECOVERY == decoding_status_) {
271 // Absorb all input data. We've already output page reload HTML.
272 next_stream_data_ = NULL;
273 stream_data_len_ = 0;
274 return FILTER_NEED_MORE_DATA;
275 }
276 if (PASS_THROUGH == decoding_status_) {
277 // We must pass in available_space, but it will be changed to bytes_used.
278 FilterStatus result = CopyOut(dest_buffer, &available_space);
279 // Accumulate the returned count of bytes_used (a.k.a., available_space).
280 *dest_len += available_space;
281 return result;
282 }
283 DCHECK(false);
284 decoding_status_ = DECODING_ERROR;
285 return FILTER_ERROR;
286 }
287
288 if (!next_stream_data_ || stream_data_len_ <= 0)
289 return FILTER_NEED_MORE_DATA;
290
291 bool ret = vcdiff_streaming_decoder_->DecodeChunk(
292 next_stream_data_, stream_data_len_, &dest_buffer_excess_);
293 // Assume all data was used in decoding.
294 next_stream_data_ = NULL;
295 source_bytes_ += stream_data_len_;
296 stream_data_len_ = 0;
297 output_bytes_ += dest_buffer_excess_.size();
298 if (!ret) {
299 vcdiff_streaming_decoder_.reset(NULL); // Don't call it again.
300 decoding_status_ = DECODING_ERROR;
301 SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR);
302 return FILTER_ERROR;
303 }
304
305 amount = OutputBufferExcess(dest_buffer, available_space);
306 *dest_len += amount;
307 dest_buffer += amount;
308 available_space -= amount;
309 if (0 == available_space && !dest_buffer_excess_.empty())
310 return FILTER_OK;
311 return FILTER_NEED_MORE_DATA;
312 }
313
InitializeDictionary()314 Filter::FilterStatus SdchFilter::InitializeDictionary() {
315 const size_t kServerIdLength = 9; // Dictionary hash plus null from server.
316 size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
317 DCHECK_GT(bytes_needed, 0u);
318 if (!next_stream_data_)
319 return FILTER_NEED_MORE_DATA;
320 if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
321 dictionary_hash_.append(next_stream_data_, stream_data_len_);
322 next_stream_data_ = NULL;
323 stream_data_len_ = 0;
324 return FILTER_NEED_MORE_DATA;
325 }
326 dictionary_hash_.append(next_stream_data_, bytes_needed);
327 DCHECK(kServerIdLength == dictionary_hash_.size());
328 stream_data_len_ -= bytes_needed;
329 DCHECK_LE(0, stream_data_len_);
330 if (stream_data_len_ > 0)
331 next_stream_data_ += bytes_needed;
332 else
333 next_stream_data_ = NULL;
334
335 DCHECK(!dictionary_.get());
336 dictionary_hash_is_plausible_ = true; // Assume plausible, but check.
337
338 SdchManager::Dictionary* dictionary = NULL;
339 if ('\0' == dictionary_hash_[kServerIdLength - 1])
340 SdchManager::Global()->GetVcdiffDictionary(std::string(dictionary_hash_, 0,
341 kServerIdLength - 1),
342 url_, &dictionary);
343 else
344 dictionary_hash_is_plausible_ = false;
345
346 if (!dictionary) {
347 DCHECK(dictionary_hash_.size() == kServerIdLength);
348 // Since dictionary was not found, check to see if hash was even plausible.
349 for (size_t i = 0; i < kServerIdLength - 1; ++i) {
350 char base64_char = dictionary_hash_[i];
351 if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) {
352 dictionary_hash_is_plausible_ = false;
353 break;
354 }
355 }
356 if (dictionary_hash_is_plausible_)
357 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND);
358 else
359 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED);
360 decoding_status_ = DECODING_ERROR;
361 return FILTER_ERROR;
362 }
363 dictionary_ = dictionary;
364 vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
365 vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
366 vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
367 dictionary_->text().size());
368 decoding_status_ = DECODING_IN_PROGRESS;
369 return FILTER_OK;
370 }
371
OutputBufferExcess(char * const dest_buffer,size_t available_space)372 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
373 size_t available_space) {
374 if (dest_buffer_excess_.empty())
375 return 0;
376 DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
377 size_t amount = std::min(available_space,
378 dest_buffer_excess_.size() - dest_buffer_excess_index_);
379 memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
380 amount);
381 dest_buffer_excess_index_ += amount;
382 if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
383 DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
384 dest_buffer_excess_.clear();
385 dest_buffer_excess_index_ = 0;
386 }
387 return amount;
388 }
389
390 } // namespace net
391