1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/filter/sdch_filter.h"
6
7 #include <ctype.h>
8 #include <limits.h>
9
10 #include <algorithm>
11
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "net/base/sdch_manager.h"
15 #include "net/url_request/url_request_context.h"
16
17 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
18
19 namespace net {
20
21 namespace {
22
23 // Disambiguate various types of responses that trigger a meta-refresh,
24 // failure, or fallback to pass-through.
25 enum ResponseCorruptionDetectionCause {
26 RESPONSE_NONE,
27
28 // 404 Http Response Code
29 RESPONSE_404 = 1,
30
31 // Not a 200 Http Response Code
32 RESPONSE_NOT_200 = 2,
33
34 // Cached before dictionary retrieved.
35 RESPONSE_OLD_UNENCODED = 3,
36
37 // Speculative but incorrect SDCH filtering was added added.
38 RESPONSE_TENTATIVE_SDCH = 4,
39
40 // Missing correct dict for decoding.
41 RESPONSE_NO_DICTIONARY = 5,
42
43 // Not an SDCH response but should be.
44 RESPONSE_CORRUPT_SDCH = 6,
45
46 // No dictionary was advertised with the request, the server claims
47 // to have encoded with SDCH anyway, but it isn't an SDCH response.
48 RESPONSE_ENCODING_LIE = 7,
49
50 RESPONSE_MAX,
51 };
52
53 } // namespace
54
SdchFilter(const FilterContext & filter_context)55 SdchFilter::SdchFilter(const FilterContext& filter_context)
56 : filter_context_(filter_context),
57 decoding_status_(DECODING_UNINITIALIZED),
58 dictionary_hash_(),
59 dictionary_hash_is_plausible_(false),
60 dictionary_(NULL),
61 url_request_context_(filter_context.GetURLRequestContext()),
62 dest_buffer_excess_(),
63 dest_buffer_excess_index_(0),
64 source_bytes_(0),
65 output_bytes_(0),
66 possible_pass_through_(false) {
67 bool success = filter_context.GetMimeType(&mime_type_);
68 DCHECK(success);
69 success = filter_context.GetURL(&url_);
70 DCHECK(success);
71 DCHECK(url_request_context_->sdch_manager());
72 }
73
~SdchFilter()74 SdchFilter::~SdchFilter() {
75 // All code here is for gathering stats, and can be removed when SDCH is
76 // considered stable.
77
78 static int filter_use_count = 0;
79 ++filter_use_count;
80 if (META_REFRESH_RECOVERY == decoding_status_) {
81 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
82 }
83
84 if (vcdiff_streaming_decoder_.get()) {
85 if (!vcdiff_streaming_decoder_->FinishDecoding()) {
86 decoding_status_ = DECODING_ERROR;
87 SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT);
88 // Make it possible for the user to hit reload, and get non-sdch content.
89 // Note this will "wear off" quickly enough, and is just meant to assure
90 // in some rare case that the user is not stuck.
91 url_request_context_->sdch_manager()->BlacklistDomain(
92 url_, SdchManager::INCOMPLETE_SDCH_CONTENT);
93 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
94 static_cast<int>(filter_context_.GetByteReadCount()));
95 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
96 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
97 }
98 }
99
100 if (!dest_buffer_excess_.empty()) {
101 // Filter chaining error, or premature teardown.
102 SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT);
103 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
104 static_cast<int>(filter_context_.GetByteReadCount()));
105 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
106 dest_buffer_excess_.size());
107 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
108 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
109 }
110
111 if (filter_context_.IsCachedContent()) {
112 // Not a real error, but it is useful to have this tally.
113 // TODO(jar): Remove this stat after SDCH stability is validated.
114 SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED);
115 return; // We don't need timing stats, and we aready got ratios.
116 }
117
118 switch (decoding_status_) {
119 case DECODING_IN_PROGRESS: {
120 if (output_bytes_)
121 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
122 static_cast<int>(
123 (filter_context_.GetByteReadCount() * 100) / output_bytes_));
124 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
125 output_bytes_);
126 filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
127
128 // Allow latency experiments to proceed.
129 url_request_context_->sdch_manager()->SetAllowLatencyExperiment(
130 url_, true);
131 return;
132 }
133 case PASS_THROUGH: {
134 filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
135 return;
136 }
137 case DECODING_UNINITIALIZED: {
138 SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED);
139 return;
140 }
141 case WAITING_FOR_DICTIONARY_SELECTION: {
142 SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY);
143 return;
144 }
145 case DECODING_ERROR: {
146 SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR);
147 return;
148 }
149 case META_REFRESH_RECOVERY: {
150 // Already accounted for when set.
151 return;
152 }
153 } // end of switch.
154 }
155
InitDecoding(Filter::FilterType filter_type)156 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
157 if (decoding_status_ != DECODING_UNINITIALIZED)
158 return false;
159
160 // Handle case where sdch filter is guessed, but not required.
161 if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
162 possible_pass_through_ = true;
163
164 // Initialize decoder only after we have a dictionary in hand.
165 decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
166 return true;
167 }
168
169 #ifndef NDEBUG
170 static const char* kDecompressionErrorHtml =
171 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
172 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
173 "border-color:black;border-style:solid;text-align:left;font-family:arial;"
174 "font-size:10pt;foreground-color:black;background-color:white\">"
175 "An error occurred. This page will be reloaded shortly. "
176 "Or press the \"reload\" button now to reload it immediately."
177 "</div>";
178 #else
179 static const char* kDecompressionErrorHtml =
180 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
181 #endif
182
ReadFilteredData(char * dest_buffer,int * dest_len)183 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
184 int* dest_len) {
185 int available_space = *dest_len;
186 *dest_len = 0; // Nothing output yet.
187
188 if (!dest_buffer || available_space <= 0)
189 return FILTER_ERROR;
190
191 if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
192 FilterStatus status = InitializeDictionary();
193 if (FILTER_NEED_MORE_DATA == status)
194 return FILTER_NEED_MORE_DATA;
195 if (FILTER_ERROR == status) {
196 DCHECK_EQ(DECODING_ERROR, decoding_status_);
197 DCHECK_EQ(0u, dest_buffer_excess_index_);
198 DCHECK(dest_buffer_excess_.empty());
199 // This is where we try very hard to do error recovery, and make this
200 // protocol robust in the face of proxies that do many different things.
201 // If we decide that things are looking very bad (too hard to recover),
202 // we may even issue a "meta-refresh" to reload the page without an SDCH
203 // advertisement (so that we are sure we're not hurting anything).
204 //
205 // Watch out for an error page inserted by the proxy as part of a 40x
206 // error response. When we see such content molestation, we certainly
207 // need to fall into the meta-refresh case.
208 ResponseCorruptionDetectionCause cause = RESPONSE_NONE;
209 if (filter_context_.GetResponseCode() == 404) {
210 // We could be more generous, but for now, only a "NOT FOUND" code will
211 // cause a pass through. All other bad codes will fall into a
212 // meta-refresh.
213 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE);
214 cause = RESPONSE_404;
215 decoding_status_ = PASS_THROUGH;
216 } else if (filter_context_.GetResponseCode() != 200) {
217 // We need to meta-refresh, with SDCH disabled.
218 cause = RESPONSE_NOT_200;
219 } else if (filter_context_.IsCachedContent()
220 && !dictionary_hash_is_plausible_) {
221 // We must have hit the back button, and gotten content that was fetched
222 // before we *really* advertised SDCH and a dictionary.
223 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED);
224 decoding_status_ = PASS_THROUGH;
225 cause = RESPONSE_OLD_UNENCODED;
226 } else if (possible_pass_through_) {
227 // This is the potentially most graceful response. There really was no
228 // error. We were just overly cautious when we added a TENTATIVE_SDCH.
229 // We added the sdch coding tag, and it should not have been added.
230 // This can happen in server experiments, where the server decides
231 // not to use sdch, even though there is a dictionary. To be
232 // conservative, we locally added the tentative sdch (fearing that a
233 // proxy stripped it!) and we must now recant (pass through).
234 //
235 // However.... just to be sure we don't get burned by proxies that
236 // re-compress with gzip or other system, we can sniff to see if this
237 // is compressed data etc. For now, we do nothing, which gets us into
238 // the meta-refresh result.
239 // TODO(jar): Improve robustness by sniffing for valid text that we can
240 // actual use re: decoding_status_ = PASS_THROUGH;
241 cause = RESPONSE_TENTATIVE_SDCH;
242 } else if (dictionary_hash_is_plausible_) {
243 // We need a meta-refresh since we don't have the dictionary.
244 // The common cause is a restart of the browser, where we try to render
245 // cached content that was saved when we had a dictionary.
246 cause = RESPONSE_NO_DICTIONARY;
247 } else if (filter_context_.SdchResponseExpected()) {
248 // This is a very corrupt SDCH request response. We can't decode it.
249 // We'll use a meta-refresh, and get content without asking for SDCH.
250 // This will also progressively disable SDCH for this domain.
251 cause = RESPONSE_CORRUPT_SDCH;
252 } else {
253 // One of the first 9 bytes precluded consideration as a hash.
254 // This can't be an SDCH payload, even though the server said it was.
255 // This is a major error, as the server or proxy tagged this SDCH even
256 // though it is not!
257 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
258 // Worse yet, meta-refresh could lead to an infinite refresh loop.
259 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH);
260 decoding_status_ = PASS_THROUGH;
261 // ... but further back-off on advertising SDCH support.
262 url_request_context_->sdch_manager()->BlacklistDomain(
263 url_, SdchManager::PASSING_THROUGH_NON_SDCH);
264 cause = RESPONSE_ENCODING_LIE;
265 }
266 DCHECK_NE(RESPONSE_NONE, cause);
267
268 // Use if statement rather than ?: because UMA_HISTOGRAM_ENUMERATION
269 // caches the histogram name based on the call site.
270 if (filter_context_.IsCachedContent()) {
271 UMA_HISTOGRAM_ENUMERATION(
272 "Sdch3.ResponseCorruptionDetection.Cached", cause, RESPONSE_MAX);
273 } else {
274 UMA_HISTOGRAM_ENUMERATION(
275 "Sdch3.ResponseCorruptionDetection.Uncached", cause, RESPONSE_MAX);
276 }
277
278 if (decoding_status_ == PASS_THROUGH) {
279 dest_buffer_excess_ = dictionary_hash_; // Send what we scanned.
280 } else {
281 // This is where we try to do the expensive meta-refresh.
282 if (std::string::npos == mime_type_.find("text/html")) {
283 // Since we can't do a meta-refresh (along with an exponential
284 // backoff), we'll just make sure this NEVER happens again.
285 SdchManager::ProblemCodes problem =
286 (filter_context_.IsCachedContent() ?
287 SdchManager::CACHED_META_REFRESH_UNSUPPORTED :
288 SdchManager::META_REFRESH_UNSUPPORTED);
289 url_request_context_->sdch_manager()->BlacklistDomainForever(
290 url_, problem);
291 SdchManager::SdchErrorRecovery(problem);
292 return FILTER_ERROR;
293 }
294 // HTML content means we can issue a meta-refresh, and get the content
295 // again, perhaps without SDCH (to be safe).
296 if (filter_context_.IsCachedContent()) {
297 // Cached content is probably a startup tab, so we'll just get fresh
298 // content and try again, without disabling sdch.
299 SdchManager::SdchErrorRecovery(
300 SdchManager::META_REFRESH_CACHED_RECOVERY);
301 } else {
302 // Since it wasn't in the cache, we definately need at least some
303 // period of blacklisting to get the correct content.
304 url_request_context_->sdch_manager()->BlacklistDomain(
305 url_, SdchManager::META_REFRESH_RECOVERY);
306 SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY);
307 }
308 decoding_status_ = META_REFRESH_RECOVERY;
309 // Issue a meta redirect with SDCH disabled.
310 dest_buffer_excess_ = kDecompressionErrorHtml;
311 }
312 } else {
313 DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_);
314 }
315 }
316
317 int amount = OutputBufferExcess(dest_buffer, available_space);
318 *dest_len += amount;
319 dest_buffer += amount;
320 available_space -= amount;
321 DCHECK_GE(available_space, 0);
322
323 if (available_space <= 0)
324 return FILTER_OK;
325 DCHECK(dest_buffer_excess_.empty());
326 DCHECK_EQ(0u, dest_buffer_excess_index_);
327
328 if (decoding_status_ != DECODING_IN_PROGRESS) {
329 if (META_REFRESH_RECOVERY == decoding_status_) {
330 // Absorb all input data. We've already output page reload HTML.
331 next_stream_data_ = NULL;
332 stream_data_len_ = 0;
333 return FILTER_NEED_MORE_DATA;
334 }
335 if (PASS_THROUGH == decoding_status_) {
336 // We must pass in available_space, but it will be changed to bytes_used.
337 FilterStatus result = CopyOut(dest_buffer, &available_space);
338 // Accumulate the returned count of bytes_used (a.k.a., available_space).
339 *dest_len += available_space;
340 return result;
341 }
342 DCHECK(false);
343 decoding_status_ = DECODING_ERROR;
344 return FILTER_ERROR;
345 }
346
347 if (!next_stream_data_ || stream_data_len_ <= 0)
348 return FILTER_NEED_MORE_DATA;
349
350 bool ret = vcdiff_streaming_decoder_->DecodeChunk(
351 next_stream_data_, stream_data_len_, &dest_buffer_excess_);
352 // Assume all data was used in decoding.
353 next_stream_data_ = NULL;
354 source_bytes_ += stream_data_len_;
355 stream_data_len_ = 0;
356 output_bytes_ += dest_buffer_excess_.size();
357 if (!ret) {
358 vcdiff_streaming_decoder_.reset(NULL); // Don't call it again.
359 decoding_status_ = DECODING_ERROR;
360 SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR);
361 return FILTER_ERROR;
362 }
363
364 amount = OutputBufferExcess(dest_buffer, available_space);
365 *dest_len += amount;
366 dest_buffer += amount;
367 available_space -= amount;
368 if (0 == available_space && !dest_buffer_excess_.empty())
369 return FILTER_OK;
370 return FILTER_NEED_MORE_DATA;
371 }
372
InitializeDictionary()373 Filter::FilterStatus SdchFilter::InitializeDictionary() {
374 const size_t kServerIdLength = 9; // Dictionary hash plus null from server.
375 size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
376 DCHECK_GT(bytes_needed, 0u);
377 if (!next_stream_data_)
378 return FILTER_NEED_MORE_DATA;
379 if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
380 dictionary_hash_.append(next_stream_data_, stream_data_len_);
381 next_stream_data_ = NULL;
382 stream_data_len_ = 0;
383 return FILTER_NEED_MORE_DATA;
384 }
385 dictionary_hash_.append(next_stream_data_, bytes_needed);
386 DCHECK(kServerIdLength == dictionary_hash_.size());
387 stream_data_len_ -= bytes_needed;
388 DCHECK_LE(0, stream_data_len_);
389 if (stream_data_len_ > 0)
390 next_stream_data_ += bytes_needed;
391 else
392 next_stream_data_ = NULL;
393
394 DCHECK(!dictionary_.get());
395 dictionary_hash_is_plausible_ = true; // Assume plausible, but check.
396
397 if ('\0' == dictionary_hash_[kServerIdLength - 1]) {
398 SdchManager* manager(url_request_context_->sdch_manager());
399 manager->GetVcdiffDictionary(
400 std::string(dictionary_hash_, 0, kServerIdLength - 1),
401 url_, &dictionary_);
402 } else {
403 dictionary_hash_is_plausible_ = false;
404 }
405
406 if (!dictionary_.get()) {
407 DCHECK(dictionary_hash_.size() == kServerIdLength);
408 // Since dictionary was not found, check to see if hash was even plausible.
409 for (size_t i = 0; i < kServerIdLength - 1; ++i) {
410 char base64_char = dictionary_hash_[i];
411 if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) {
412 dictionary_hash_is_plausible_ = false;
413 break;
414 }
415 }
416 if (dictionary_hash_is_plausible_)
417 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND);
418 else
419 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED);
420 decoding_status_ = DECODING_ERROR;
421 return FILTER_ERROR;
422 }
423 vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
424 vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
425 vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
426 dictionary_->text().size());
427 decoding_status_ = DECODING_IN_PROGRESS;
428 return FILTER_OK;
429 }
430
OutputBufferExcess(char * const dest_buffer,size_t available_space)431 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
432 size_t available_space) {
433 if (dest_buffer_excess_.empty())
434 return 0;
435 DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
436 size_t amount = std::min(available_space,
437 dest_buffer_excess_.size() - dest_buffer_excess_index_);
438 memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
439 amount);
440 dest_buffer_excess_index_ += amount;
441 if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
442 DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
443 dest_buffer_excess_.clear();
444 dest_buffer_excess_index_ = 0;
445 }
446 return amount;
447 }
448
449 } // namespace net
450