• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "webkit/glue/multipart_response_delegate.h"
6 
7 #include "base/logging.h"
8 #include "base/string_number_conversions.h"
9 #include "base/string_util.h"
10 #include "net/base/net_util.h"
11 #include "net/http/http_util.h"
12 #include "third_party/WebKit/Source/WebKit/chromium/public/WebHTTPHeaderVisitor.h"
13 #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h"
14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLLoaderClient.h"
16 
17 using WebKit::WebHTTPHeaderVisitor;
18 using WebKit::WebString;
19 using WebKit::WebURLLoader;
20 using WebKit::WebURLLoaderClient;
21 using WebKit::WebURLResponse;
22 
23 namespace webkit_glue {
24 
25 namespace {
26 
27 // The list of response headers that we do not copy from the original
28 // response when generating a WebURLResponse for a MIME payload.
29 const char* kReplaceHeaders[] = {
30   "content-type",
31   "content-length",
32   "content-disposition",
33   "content-range",
34   "range",
35   "set-cookie"
36 };
37 
38 class HeaderCopier : public WebHTTPHeaderVisitor {
39  public:
HeaderCopier(WebURLResponse * response)40   HeaderCopier(WebURLResponse* response)
41       : response_(response) {
42   }
visitHeader(const WebString & name,const WebString & value)43   virtual void visitHeader(const WebString& name, const WebString& value) {
44     const std::string& name_utf8 = name.utf8();
45     for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
46       if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i]))
47         return;
48     }
49     response_->setHTTPHeaderField(name, value);
50   }
51  private:
52   WebURLResponse* response_;
53 };
54 
55 }  // namespace
56 
MultipartResponseDelegate(WebURLLoaderClient * client,WebURLLoader * loader,const WebURLResponse & response,const std::string & boundary)57 MultipartResponseDelegate::MultipartResponseDelegate(
58     WebURLLoaderClient* client,
59     WebURLLoader* loader,
60     const WebURLResponse& response,
61     const std::string& boundary)
62     : client_(client),
63       loader_(loader),
64       original_response_(response),
65       encoded_data_length_(0),
66       boundary_("--"),
67       first_received_data_(true),
68       processing_headers_(false),
69       stop_sending_(false),
70       has_sent_first_response_(false) {
71   // Some servers report a boundary prefixed with "--".  See bug 5786.
72   if (StartsWithASCII(boundary, "--", true)) {
73     boundary_.assign(boundary);
74   } else {
75     boundary_.append(boundary);
76   }
77 }
78 
OnReceivedData(const char * data,int data_len,int encoded_data_length)79 void MultipartResponseDelegate::OnReceivedData(const char* data,
80                                                int data_len,
81                                                int encoded_data_length) {
82   // stop_sending_ means that we've already received the final boundary token.
83   // The server should stop sending us data at this point, but if it does, we
84   // just throw it away.
85   if (stop_sending_)
86     return;
87 
88   data_.append(data, data_len);
89   encoded_data_length_ += encoded_data_length;
90   if (first_received_data_) {
91     // Some servers don't send a boundary token before the first chunk of
92     // data.  We handle this case anyway (Gecko does too).
93     first_received_data_ = false;
94 
95     // Eat leading \r\n
96     int pos = PushOverLine(data_, 0);
97     if (pos)
98       data_ = data_.substr(pos);
99 
100     if (data_.length() < boundary_.length() + 2) {
101       // We don't have enough data yet to make a boundary token.  Just wait
102       // until the next chunk of data arrives.
103       first_received_data_ = true;
104       return;
105     }
106 
107     if (0 != data_.compare(0, boundary_.length(), boundary_)) {
108       data_ = boundary_ + "\n" + data_;
109     }
110   }
111   DCHECK(!first_received_data_);
112 
113   // Headers
114   if (processing_headers_) {
115     // Eat leading \r\n
116     int pos = PushOverLine(data_, 0);
117     if (pos)
118       data_ = data_.substr(pos);
119 
120     if (ParseHeaders()) {
121       // Successfully parsed headers.
122       processing_headers_ = false;
123     } else {
124       // Get more data before trying again.
125       return;
126     }
127   }
128   DCHECK(!processing_headers_);
129 
130   size_t boundary_pos;
131   while ((boundary_pos = FindBoundary()) != std::string::npos) {
132     if (client_) {
133       // Strip out trailing \n\r characters in the buffer preceding the
134       // boundary on the same lines as Firefox.
135       size_t data_length = boundary_pos;
136       if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') {
137         data_length--;
138         if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') {
139           data_length--;
140         }
141       }
142       if (data_length > 0) {
143         // Send the last data chunk.
144         client_->didReceiveData(loader_,
145                                 data_.data(),
146                                 static_cast<int>(data_length),
147                                 encoded_data_length_);
148         encoded_data_length_ = 0;
149       }
150     }
151     size_t boundary_end_pos = boundary_pos + boundary_.length();
152     if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) {
153       // This was the last boundary so we can stop processing.
154       stop_sending_ = true;
155       data_.clear();
156       return;
157     }
158 
159     // We can now throw out data up through the boundary
160     int offset = PushOverLine(data_, boundary_end_pos);
161     data_ = data_.substr(boundary_end_pos + offset);
162 
163     // Ok, back to parsing headers
164     if (!ParseHeaders()) {
165       processing_headers_ = true;
166       break;
167     }
168   }
169 
170   // At this point, we should send over any data we have, but keep enough data
171   // buffered to handle a boundary that may have been truncated.
172   if (!processing_headers_ && data_.length() > boundary_.length()) {
173     // If the last character is a new line character, go ahead and just send
174     // everything we have buffered.  This matches an optimization in Gecko.
175     int send_length = data_.length() - boundary_.length();
176     if (data_[data_.length() - 1] == '\n')
177       send_length = data_.length();
178     if (client_)
179       client_->didReceiveData(loader_,
180                               data_.data(),
181                               send_length,
182                               encoded_data_length_);
183     data_ = data_.substr(send_length);
184     encoded_data_length_ = 0;
185   }
186 }
187 
OnCompletedRequest()188 void MultipartResponseDelegate::OnCompletedRequest() {
189   // If we have any pending data and we're not in a header, go ahead and send
190   // it to WebCore.
191   if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) {
192     client_->didReceiveData(loader_,
193                             data_.data(),
194                             static_cast<int>(data_.length()),
195                             encoded_data_length_);
196     encoded_data_length_ = 0;
197   }
198 }
199 
PushOverLine(const std::string & data,size_t pos)200 int MultipartResponseDelegate::PushOverLine(const std::string& data,
201                                             size_t pos) {
202   int offset = 0;
203   if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) {
204     ++offset;
205     if (pos + 1 < data.length() && data[pos + 1] == '\n')
206       ++offset;
207   }
208   return offset;
209 }
210 
ParseHeaders()211 bool MultipartResponseDelegate::ParseHeaders() {
212   int line_feed_increment = 1;
213 
214   // Grab the headers being liberal about line endings.
215   size_t line_start_pos = 0;
216   size_t line_end_pos = data_.find('\n');
217   while (line_end_pos != std::string::npos) {
218     // Handle CRLF
219     if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') {
220       line_feed_increment = 2;
221       --line_end_pos;
222     } else {
223       line_feed_increment = 1;
224     }
225     if (line_start_pos == line_end_pos) {
226       // A blank line, end of headers
227       line_end_pos += line_feed_increment;
228       break;
229     }
230     // Find the next header line.
231     line_start_pos = line_end_pos + line_feed_increment;
232     line_end_pos = data_.find('\n', line_start_pos);
233   }
234   // Truncated in the middle of a header, stop parsing.
235   if (line_end_pos == std::string::npos)
236     return false;
237 
238   // Eat headers
239   std::string headers("\n");
240   headers.append(data_, 0, line_end_pos);
241   data_ = data_.substr(line_end_pos);
242 
243   // Create a WebURLResponse based on the original set of headers + the
244   // replacement headers.  We only replace the same few headers that gecko
245   // does.  See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
246   std::string content_type = net::GetSpecificHeader(headers, "content-type");
247   std::string mime_type;
248   std::string charset;
249   bool has_charset = false;
250   net::HttpUtil::ParseContentType(content_type, &mime_type, &charset,
251                                   &has_charset);
252   WebURLResponse response(original_response_.url());
253   response.setMIMEType(WebString::fromUTF8(mime_type));
254   response.setTextEncodingName(WebString::fromUTF8(charset));
255 
256   HeaderCopier copier(&response);
257   original_response_.visitHTTPHeaderFields(&copier);
258 
259   for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
260     std::string name(kReplaceHeaders[i]);
261     std::string value = net::GetSpecificHeader(headers, name);
262     if (!value.empty()) {
263       response.setHTTPHeaderField(WebString::fromUTF8(name),
264                                   WebString::fromUTF8(value));
265     }
266   }
267   // To avoid recording every multipart load as a separate visit in
268   // the history database, we want to keep track of whether the response
269   // is part of a multipart payload.  We do want to record the first visit,
270   // so we only set isMultipartPayload to true after the first visit.
271   response.setIsMultipartPayload(has_sent_first_response_);
272   has_sent_first_response_ = true;
273   // Send the response!
274   if (client_)
275     client_->didReceiveResponse(loader_, response);
276 
277   return true;
278 }
279 
280 // Boundaries are supposed to be preceeded with --, but it looks like gecko
281 // doesn't require the dashes to exist.  See nsMultiMixedConv::FindToken.
FindBoundary()282 size_t MultipartResponseDelegate::FindBoundary() {
283   size_t boundary_pos = data_.find(boundary_);
284   if (boundary_pos != std::string::npos) {
285     // Back up over -- for backwards compat
286     // TODO(tc): Don't we only want to do this once?  Gecko code doesn't seem
287     // to care.
288     if (boundary_pos >= 2) {
289       if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) {
290         boundary_pos -= 2;
291         boundary_ = "--" + boundary_;
292       }
293     }
294   }
295   return boundary_pos;
296 }
297 
ReadMultipartBoundary(const WebURLResponse & response,std::string * multipart_boundary)298 bool MultipartResponseDelegate::ReadMultipartBoundary(
299     const WebURLResponse& response,
300     std::string* multipart_boundary) {
301   std::string content_type =
302       response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8();
303 
304   size_t boundary_start_offset = content_type.find("boundary=");
305   if (boundary_start_offset == std::string::npos)
306     return false;
307 
308   boundary_start_offset += strlen("boundary=");
309 
310   size_t boundary_end_offset = content_type.find(';', boundary_start_offset);
311 
312   if (boundary_end_offset == std::string::npos)
313     boundary_end_offset = content_type.length();
314 
315   size_t boundary_length = boundary_end_offset - boundary_start_offset;
316 
317   *multipart_boundary =
318       content_type.substr(boundary_start_offset, boundary_length);
319   // The byte range response can have quoted boundary strings. This is legal
320   // as per MIME specifications. Individual data fragements however don't
321   // contain quoted boundary strings.
322   TrimString(*multipart_boundary, "\"", multipart_boundary);
323   return true;
324 }
325 
ReadContentRanges(const WebURLResponse & response,int * content_range_lower_bound,int * content_range_upper_bound,int * content_range_instance_size)326 bool MultipartResponseDelegate::ReadContentRanges(
327     const WebURLResponse& response,
328     int* content_range_lower_bound,
329     int* content_range_upper_bound,
330     int* content_range_instance_size) {
331 
332   std::string content_range = response.httpHeaderField("Content-Range").utf8();
333   if (content_range.empty()) {
334     content_range = response.httpHeaderField("Range").utf8();
335   }
336 
337   if (content_range.empty()) {
338     DLOG(WARNING) << "Failed to read content range from response.";
339     return false;
340   }
341 
342   size_t byte_range_lower_bound_start_offset = content_range.find(" ");
343   if (byte_range_lower_bound_start_offset == std::string::npos) {
344     return false;
345   }
346 
347   // Skip over the initial space.
348   byte_range_lower_bound_start_offset++;
349 
350   // Find the lower bound.
351   size_t byte_range_lower_bound_end_offset =
352       content_range.find("-", byte_range_lower_bound_start_offset);
353   if (byte_range_lower_bound_end_offset == std::string::npos) {
354     return false;
355   }
356 
357   size_t byte_range_lower_bound_characters =
358       byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset;
359   std::string byte_range_lower_bound =
360       content_range.substr(byte_range_lower_bound_start_offset,
361                            byte_range_lower_bound_characters);
362 
363   // Find the upper bound.
364   size_t byte_range_upper_bound_start_offset =
365       byte_range_lower_bound_end_offset + 1;
366 
367   size_t byte_range_upper_bound_end_offset =
368       content_range.find("/", byte_range_upper_bound_start_offset);
369   if (byte_range_upper_bound_end_offset == std::string::npos) {
370     return false;
371   }
372 
373   size_t byte_range_upper_bound_characters =
374       byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset;
375   std::string byte_range_upper_bound =
376       content_range.substr(byte_range_upper_bound_start_offset,
377                            byte_range_upper_bound_characters);
378 
379   // Find the instance size.
380   size_t byte_range_instance_size_start_offset =
381       byte_range_upper_bound_end_offset + 1;
382 
383   size_t byte_range_instance_size_end_offset =
384       content_range.length();
385 
386   size_t byte_range_instance_size_characters =
387       byte_range_instance_size_end_offset -
388       byte_range_instance_size_start_offset;
389   std::string byte_range_instance_size =
390       content_range.substr(byte_range_instance_size_start_offset,
391                            byte_range_instance_size_characters);
392 
393   if (!base::StringToInt(byte_range_lower_bound, content_range_lower_bound))
394     return false;
395   if (!base::StringToInt(byte_range_upper_bound, content_range_upper_bound))
396     return false;
397   if (!base::StringToInt(byte_range_instance_size, content_range_instance_size))
398     return false;
399   return true;
400 }
401 
402 }  // namespace webkit_glue
403