• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/sdch_filter.h"
6 
7 #include <limits.h>
8 #include <ctype.h>
9 #include <algorithm>
10 
11 #include "base/file_util.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "net/base/sdch_manager.h"
15 
16 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
17 
18 namespace net {
19 
SdchFilter(const FilterContext & filter_context)20 SdchFilter::SdchFilter(const FilterContext& filter_context)
21     : filter_context_(filter_context),
22       decoding_status_(DECODING_UNINITIALIZED),
23       vcdiff_streaming_decoder_(NULL),
24       dictionary_hash_(),
25       dictionary_hash_is_plausible_(false),
26       dictionary_(NULL),
27       dest_buffer_excess_(),
28       dest_buffer_excess_index_(0),
29       source_bytes_(0),
30       output_bytes_(0),
31       possible_pass_through_(false) {
32   bool success = filter_context.GetMimeType(&mime_type_);
33   DCHECK(success);
34   success = filter_context.GetURL(&url_);
35   DCHECK(success);
36 }
37 
~SdchFilter()38 SdchFilter::~SdchFilter() {
39   // All code here is for gathering stats, and can be removed when SDCH is
40   // considered stable.
41 
42   static int filter_use_count = 0;
43   ++filter_use_count;
44   if (META_REFRESH_RECOVERY == decoding_status_) {
45     UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
46   }
47 
48   if (vcdiff_streaming_decoder_.get()) {
49     if (!vcdiff_streaming_decoder_->FinishDecoding()) {
50       decoding_status_ = DECODING_ERROR;
51       SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT);
52       // Make it possible for the user to hit reload, and get non-sdch content.
53       // Note this will "wear off" quickly enough, and is just meant to assure
54       // in some rare case that the user is not stuck.
55       SdchManager::BlacklistDomain(url_);
56       UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
57            static_cast<int>(filter_context_.GetByteReadCount()));
58       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
59       UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
60     }
61   }
62 
63   if (!dest_buffer_excess_.empty()) {
64     // Filter chaining error, or premature teardown.
65     SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT);
66     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
67          static_cast<int>(filter_context_.GetByteReadCount()));
68     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
69                          dest_buffer_excess_.size());
70     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
71     UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
72   }
73 
74   if (filter_context_.IsCachedContent()) {
75     // Not a real error, but it is useful to have this tally.
76     // TODO(jar): Remove this stat after SDCH stability is validated.
77     SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED);
78     return;  // We don't need timing stats, and we aready got ratios.
79   }
80 
81   switch (decoding_status_) {
82     case DECODING_IN_PROGRESS: {
83       if (output_bytes_)
84         UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
85             static_cast<int>(
86                 (filter_context_.GetByteReadCount() * 100) / output_bytes_));
87       UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
88                            output_bytes_);
89       filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
90 
91       // Allow latency experiments to proceed.
92       SdchManager::Global()->SetAllowLatencyExperiment(url_, true);
93       return;
94     }
95     case PASS_THROUGH: {
96       filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
97       return;
98     }
99     case DECODING_UNINITIALIZED: {
100       SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED);
101       return;
102     }
103     case WAITING_FOR_DICTIONARY_SELECTION: {
104       SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY);
105       return;
106     }
107     case DECODING_ERROR: {
108       SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR);
109       return;
110     }
111     case META_REFRESH_RECOVERY: {
112       // Already accounted for when set.
113       return;
114     }
115   }  // end of switch.
116 }
117 
InitDecoding(Filter::FilterType filter_type)118 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
119   if (decoding_status_ != DECODING_UNINITIALIZED)
120     return false;
121 
122   // Handle case  where sdch filter is guessed, but not required.
123   if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
124     possible_pass_through_ = true;
125 
126   // Initialize decoder only after we have a dictionary in hand.
127   decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
128   return true;
129 }
130 
131 #ifndef NDEBUG
132 static const char* kDecompressionErrorHtml =
133   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
134   "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
135   "border-color:black;border-style:solid;text-align:left;font-family:arial;"
136   "font-size:10pt;foreground-color:black;background-color:white\">"
137   "An error occurred. This page will be reloaded shortly. "
138   "Or press the \"reload\" button now to reload it immediately."
139   "</div>";
140 #else
141 static const char* kDecompressionErrorHtml =
142   "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
143 #endif
144 
ReadFilteredData(char * dest_buffer,int * dest_len)145 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
146                                                   int* dest_len) {
147   int available_space = *dest_len;
148   *dest_len = 0;  // Nothing output yet.
149 
150   if (!dest_buffer || available_space <= 0)
151     return FILTER_ERROR;
152 
153   if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
154     FilterStatus status = InitializeDictionary();
155     if (FILTER_NEED_MORE_DATA == status)
156       return FILTER_NEED_MORE_DATA;
157     if (FILTER_ERROR == status) {
158       DCHECK(DECODING_ERROR == decoding_status_);
159       DCHECK_EQ(0u, dest_buffer_excess_index_);
160       DCHECK(dest_buffer_excess_.empty());
161       // This is where we try very hard to do error recovery, and make this
162       // protocol robust in the face of proxies that do many different things.
163       // If we decide that things are looking very bad (too hard to recover),
164       // we may even issue a "meta-refresh" to reload the page without an SDCH
165       // advertisement (so that we are sure we're not hurting anything).
166       //
167       // Watch out for an error page inserted by the proxy as part of a 40x
168       // error response.  When we see such content molestation, we certainly
169       // need to fall into the meta-refresh case.
170       if (filter_context_.GetResponseCode() == 404) {
171         // We could be more generous, but for now, only a "NOT FOUND" code will
172         // cause a pass through.  All other bad codes will fall into a
173         // meta-refresh.
174         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE);
175         decoding_status_ = PASS_THROUGH;
176       } else if (filter_context_.GetResponseCode() != 200) {
177         // We need to meta-refresh, with SDCH disabled.
178       } else if (filter_context_.IsCachedContent()
179                  && !dictionary_hash_is_plausible_) {
180         // We must have hit the back button, and gotten content that was fetched
181         // before we *really* advertised SDCH and a dictionary.
182         SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED);
183         decoding_status_ = PASS_THROUGH;
184       } else if (possible_pass_through_) {
185         // This is the potentially most graceful response. There really was no
186         // error. We were just overly cautious when we added a TENTATIVE_SDCH.
187         // We added the sdch coding tag, and it should not have been added.
188         // This can happen in server experiments, where the server decides
189         // not to use sdch, even though there is a dictionary.  To be
190         // conservative, we locally added the tentative sdch (fearing that a
191         // proxy stripped it!) and we must now recant (pass through).
192         SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH);
193         // However.... just to be sure we don't get burned by proxies that
194         // re-compress with gzip or other system, we can sniff to see if this
195         // is compressed data etc.  For now, we do nothing, which gets us into
196         // the meta-refresh result.
197         // TODO(jar): Improve robustness by sniffing for valid text that we can
198         // actual use re: decoding_status_ = PASS_THROUGH;
199       } else if (dictionary_hash_is_plausible_) {
200         // We need a meta-refresh since we don't have the dictionary.
201         // The common cause is a restart of the browser, where we try to render
202         // cached content that was saved when we had a dictionary.
203       } else if (filter_context_.IsSdchResponse()) {
204         // This is a very corrupt SDCH request response.  We can't decode it.
205         // We'll use a meta-refresh, and get content without asking for SDCH.
206         // This will also progressively disable SDCH for this domain.
207       } else {
208         // One of the first 9 bytes precluded consideration as a hash.
209         // This can't be an SDCH payload, even though the server said it was.
210         // This is a major error, as the server or proxy tagged this SDCH even
211         // though it is not!
212         // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
213         // Worse yet, meta-refresh could lead to an infinite refresh loop.
214         SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH);
215         decoding_status_ = PASS_THROUGH;
216         // ... but further back-off on advertising SDCH support.
217         SdchManager::BlacklistDomain(url_);
218       }
219 
220       if (decoding_status_ == PASS_THROUGH) {
221         dest_buffer_excess_ = dictionary_hash_;  // Send what we scanned.
222       } else {
223         // This is where we try to do the expensive meta-refresh.
224         if (std::string::npos == mime_type_.find("text/html")) {
225           // Since we can't do a meta-refresh (along with an exponential
226           // backoff), we'll just make sure this NEVER happens again.
227           SdchManager::BlacklistDomainForever(url_);
228           if (filter_context_.IsCachedContent())
229             SdchManager::SdchErrorRecovery(
230                 SdchManager::CACHED_META_REFRESH_UNSUPPORTED);
231           else
232             SdchManager::SdchErrorRecovery(
233                 SdchManager::META_REFRESH_UNSUPPORTED);
234           return FILTER_ERROR;
235         }
236         // HTML content means we can issue a meta-refresh, and get the content
237         // again, perhaps without SDCH (to be safe).
238         if (filter_context_.IsCachedContent()) {
239           // Cached content is probably a startup tab, so we'll just get fresh
240           // content and try again, without disabling sdch.
241           SdchManager::SdchErrorRecovery(
242               SdchManager::META_REFRESH_CACHED_RECOVERY);
243         } else {
244           // Since it wasn't in the cache, we definately need at least some
245           // period of blacklisting to get the correct content.
246           SdchManager::BlacklistDomain(url_);
247           SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY);
248         }
249         decoding_status_ = META_REFRESH_RECOVERY;
250         // Issue a meta redirect with SDCH disabled.
251         dest_buffer_excess_ = kDecompressionErrorHtml;
252       }
253     } else {
254       DCHECK(DECODING_IN_PROGRESS == decoding_status_);
255     }
256   }
257 
258   int amount = OutputBufferExcess(dest_buffer, available_space);
259   *dest_len += amount;
260   dest_buffer += amount;
261   available_space -= amount;
262   DCHECK_GE(available_space, 0);
263 
264   if (available_space <= 0)
265     return FILTER_OK;
266   DCHECK(dest_buffer_excess_.empty());
267   DCHECK_EQ(0u, dest_buffer_excess_index_);
268 
269   if (decoding_status_ != DECODING_IN_PROGRESS) {
270     if (META_REFRESH_RECOVERY == decoding_status_) {
271       // Absorb all input data.  We've already output page reload HTML.
272       next_stream_data_ = NULL;
273       stream_data_len_ = 0;
274       return FILTER_NEED_MORE_DATA;
275     }
276     if (PASS_THROUGH == decoding_status_) {
277       // We must pass in available_space, but it will be changed to bytes_used.
278       FilterStatus result = CopyOut(dest_buffer, &available_space);
279       // Accumulate the returned count of bytes_used (a.k.a., available_space).
280       *dest_len += available_space;
281       return result;
282     }
283     DCHECK(false);
284     decoding_status_ = DECODING_ERROR;
285     return FILTER_ERROR;
286   }
287 
288   if (!next_stream_data_ || stream_data_len_ <= 0)
289     return FILTER_NEED_MORE_DATA;
290 
291   bool ret = vcdiff_streaming_decoder_->DecodeChunk(
292     next_stream_data_, stream_data_len_, &dest_buffer_excess_);
293   // Assume all data was used in decoding.
294   next_stream_data_ = NULL;
295   source_bytes_ += stream_data_len_;
296   stream_data_len_ = 0;
297   output_bytes_ += dest_buffer_excess_.size();
298   if (!ret) {
299     vcdiff_streaming_decoder_.reset(NULL);  // Don't call it again.
300     decoding_status_ = DECODING_ERROR;
301     SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR);
302     return FILTER_ERROR;
303   }
304 
305   amount = OutputBufferExcess(dest_buffer, available_space);
306   *dest_len += amount;
307   dest_buffer += amount;
308   available_space -= amount;
309   if (0 == available_space && !dest_buffer_excess_.empty())
310       return FILTER_OK;
311   return FILTER_NEED_MORE_DATA;
312 }
313 
InitializeDictionary()314 Filter::FilterStatus SdchFilter::InitializeDictionary() {
315   const size_t kServerIdLength = 9;  // Dictionary hash plus null from server.
316   size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
317   DCHECK_GT(bytes_needed, 0u);
318   if (!next_stream_data_)
319     return FILTER_NEED_MORE_DATA;
320   if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
321     dictionary_hash_.append(next_stream_data_, stream_data_len_);
322     next_stream_data_ = NULL;
323     stream_data_len_ = 0;
324     return FILTER_NEED_MORE_DATA;
325   }
326   dictionary_hash_.append(next_stream_data_, bytes_needed);
327   DCHECK(kServerIdLength == dictionary_hash_.size());
328   stream_data_len_ -= bytes_needed;
329   DCHECK_LE(0, stream_data_len_);
330   if (stream_data_len_ > 0)
331     next_stream_data_ += bytes_needed;
332   else
333     next_stream_data_ = NULL;
334 
335   DCHECK(!dictionary_.get());
336   dictionary_hash_is_plausible_ = true;  // Assume plausible, but check.
337 
338   SdchManager::Dictionary* dictionary = NULL;
339   if ('\0' == dictionary_hash_[kServerIdLength - 1])
340     SdchManager::Global()->GetVcdiffDictionary(std::string(dictionary_hash_, 0,
341                                                            kServerIdLength - 1),
342                                                url_, &dictionary);
343   else
344     dictionary_hash_is_plausible_ = false;
345 
346   if (!dictionary) {
347     DCHECK(dictionary_hash_.size() == kServerIdLength);
348     // Since dictionary was not found, check to see if hash was even plausible.
349     for (size_t i = 0; i < kServerIdLength - 1; ++i) {
350       char base64_char = dictionary_hash_[i];
351       if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) {
352         dictionary_hash_is_plausible_ = false;
353         break;
354       }
355     }
356     if (dictionary_hash_is_plausible_)
357       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND);
358     else
359       SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED);
360     decoding_status_ = DECODING_ERROR;
361     return FILTER_ERROR;
362   }
363   dictionary_ = dictionary;
364   vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
365   vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
366   vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
367                                            dictionary_->text().size());
368   decoding_status_ = DECODING_IN_PROGRESS;
369   return FILTER_OK;
370 }
371 
OutputBufferExcess(char * const dest_buffer,size_t available_space)372 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
373                                    size_t available_space) {
374   if (dest_buffer_excess_.empty())
375     return 0;
376   DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
377   size_t amount = std::min(available_space,
378       dest_buffer_excess_.size() - dest_buffer_excess_index_);
379   memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
380          amount);
381   dest_buffer_excess_index_ += amount;
382   if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
383     DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
384     dest_buffer_excess_.clear();
385     dest_buffer_excess_index_ = 0;
386   }
387   return amount;
388 }
389 
390 }  // namespace net
391