1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "webkit/glue/multipart_response_delegate.h"
6
7 #include "base/logging.h"
8 #include "base/string_number_conversions.h"
9 #include "base/string_util.h"
10 #include "net/base/net_util.h"
11 #include "net/http/http_util.h"
12 #include "third_party/WebKit/Source/WebKit/chromium/public/WebHTTPHeaderVisitor.h"
13 #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h"
14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLLoaderClient.h"
16
17 using WebKit::WebHTTPHeaderVisitor;
18 using WebKit::WebString;
19 using WebKit::WebURLLoader;
20 using WebKit::WebURLLoaderClient;
21 using WebKit::WebURLResponse;
22
23 namespace webkit_glue {
24
25 namespace {
26
27 // The list of response headers that we do not copy from the original
28 // response when generating a WebURLResponse for a MIME payload.
29 const char* kReplaceHeaders[] = {
30 "content-type",
31 "content-length",
32 "content-disposition",
33 "content-range",
34 "range",
35 "set-cookie"
36 };
37
38 class HeaderCopier : public WebHTTPHeaderVisitor {
39 public:
HeaderCopier(WebURLResponse * response)40 HeaderCopier(WebURLResponse* response)
41 : response_(response) {
42 }
visitHeader(const WebString & name,const WebString & value)43 virtual void visitHeader(const WebString& name, const WebString& value) {
44 const std::string& name_utf8 = name.utf8();
45 for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
46 if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i]))
47 return;
48 }
49 response_->setHTTPHeaderField(name, value);
50 }
51 private:
52 WebURLResponse* response_;
53 };
54
55 } // namespace
56
MultipartResponseDelegate(WebURLLoaderClient * client,WebURLLoader * loader,const WebURLResponse & response,const std::string & boundary)57 MultipartResponseDelegate::MultipartResponseDelegate(
58 WebURLLoaderClient* client,
59 WebURLLoader* loader,
60 const WebURLResponse& response,
61 const std::string& boundary)
62 : client_(client),
63 loader_(loader),
64 original_response_(response),
65 encoded_data_length_(0),
66 boundary_("--"),
67 first_received_data_(true),
68 processing_headers_(false),
69 stop_sending_(false),
70 has_sent_first_response_(false) {
71 // Some servers report a boundary prefixed with "--". See bug 5786.
72 if (StartsWithASCII(boundary, "--", true)) {
73 boundary_.assign(boundary);
74 } else {
75 boundary_.append(boundary);
76 }
77 }
78
OnReceivedData(const char * data,int data_len,int encoded_data_length)79 void MultipartResponseDelegate::OnReceivedData(const char* data,
80 int data_len,
81 int encoded_data_length) {
82 // stop_sending_ means that we've already received the final boundary token.
83 // The server should stop sending us data at this point, but if it does, we
84 // just throw it away.
85 if (stop_sending_)
86 return;
87
88 data_.append(data, data_len);
89 encoded_data_length_ += encoded_data_length;
90 if (first_received_data_) {
91 // Some servers don't send a boundary token before the first chunk of
92 // data. We handle this case anyway (Gecko does too).
93 first_received_data_ = false;
94
95 // Eat leading \r\n
96 int pos = PushOverLine(data_, 0);
97 if (pos)
98 data_ = data_.substr(pos);
99
100 if (data_.length() < boundary_.length() + 2) {
101 // We don't have enough data yet to make a boundary token. Just wait
102 // until the next chunk of data arrives.
103 first_received_data_ = true;
104 return;
105 }
106
107 if (0 != data_.compare(0, boundary_.length(), boundary_)) {
108 data_ = boundary_ + "\n" + data_;
109 }
110 }
111 DCHECK(!first_received_data_);
112
113 // Headers
114 if (processing_headers_) {
115 // Eat leading \r\n
116 int pos = PushOverLine(data_, 0);
117 if (pos)
118 data_ = data_.substr(pos);
119
120 if (ParseHeaders()) {
121 // Successfully parsed headers.
122 processing_headers_ = false;
123 } else {
124 // Get more data before trying again.
125 return;
126 }
127 }
128 DCHECK(!processing_headers_);
129
130 size_t boundary_pos;
131 while ((boundary_pos = FindBoundary()) != std::string::npos) {
132 if (client_) {
133 // Strip out trailing \n\r characters in the buffer preceding the
134 // boundary on the same lines as Firefox.
135 size_t data_length = boundary_pos;
136 if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') {
137 data_length--;
138 if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') {
139 data_length--;
140 }
141 }
142 if (data_length > 0) {
143 // Send the last data chunk.
144 client_->didReceiveData(loader_,
145 data_.data(),
146 static_cast<int>(data_length),
147 encoded_data_length_);
148 encoded_data_length_ = 0;
149 }
150 }
151 size_t boundary_end_pos = boundary_pos + boundary_.length();
152 if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) {
153 // This was the last boundary so we can stop processing.
154 stop_sending_ = true;
155 data_.clear();
156 return;
157 }
158
159 // We can now throw out data up through the boundary
160 int offset = PushOverLine(data_, boundary_end_pos);
161 data_ = data_.substr(boundary_end_pos + offset);
162
163 // Ok, back to parsing headers
164 if (!ParseHeaders()) {
165 processing_headers_ = true;
166 break;
167 }
168 }
169
170 // At this point, we should send over any data we have, but keep enough data
171 // buffered to handle a boundary that may have been truncated.
172 if (!processing_headers_ && data_.length() > boundary_.length()) {
173 // If the last character is a new line character, go ahead and just send
174 // everything we have buffered. This matches an optimization in Gecko.
175 int send_length = data_.length() - boundary_.length();
176 if (data_[data_.length() - 1] == '\n')
177 send_length = data_.length();
178 if (client_)
179 client_->didReceiveData(loader_,
180 data_.data(),
181 send_length,
182 encoded_data_length_);
183 data_ = data_.substr(send_length);
184 encoded_data_length_ = 0;
185 }
186 }
187
OnCompletedRequest()188 void MultipartResponseDelegate::OnCompletedRequest() {
189 // If we have any pending data and we're not in a header, go ahead and send
190 // it to WebCore.
191 if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) {
192 client_->didReceiveData(loader_,
193 data_.data(),
194 static_cast<int>(data_.length()),
195 encoded_data_length_);
196 encoded_data_length_ = 0;
197 }
198 }
199
PushOverLine(const std::string & data,size_t pos)200 int MultipartResponseDelegate::PushOverLine(const std::string& data,
201 size_t pos) {
202 int offset = 0;
203 if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) {
204 ++offset;
205 if (pos + 1 < data.length() && data[pos + 1] == '\n')
206 ++offset;
207 }
208 return offset;
209 }
210
ParseHeaders()211 bool MultipartResponseDelegate::ParseHeaders() {
212 int line_feed_increment = 1;
213
214 // Grab the headers being liberal about line endings.
215 size_t line_start_pos = 0;
216 size_t line_end_pos = data_.find('\n');
217 while (line_end_pos != std::string::npos) {
218 // Handle CRLF
219 if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') {
220 line_feed_increment = 2;
221 --line_end_pos;
222 } else {
223 line_feed_increment = 1;
224 }
225 if (line_start_pos == line_end_pos) {
226 // A blank line, end of headers
227 line_end_pos += line_feed_increment;
228 break;
229 }
230 // Find the next header line.
231 line_start_pos = line_end_pos + line_feed_increment;
232 line_end_pos = data_.find('\n', line_start_pos);
233 }
234 // Truncated in the middle of a header, stop parsing.
235 if (line_end_pos == std::string::npos)
236 return false;
237
238 // Eat headers
239 std::string headers("\n");
240 headers.append(data_, 0, line_end_pos);
241 data_ = data_.substr(line_end_pos);
242
243 // Create a WebURLResponse based on the original set of headers + the
244 // replacement headers. We only replace the same few headers that gecko
245 // does. See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
246 std::string content_type = net::GetSpecificHeader(headers, "content-type");
247 std::string mime_type;
248 std::string charset;
249 bool has_charset = false;
250 net::HttpUtil::ParseContentType(content_type, &mime_type, &charset,
251 &has_charset);
252 WebURLResponse response(original_response_.url());
253 response.setMIMEType(WebString::fromUTF8(mime_type));
254 response.setTextEncodingName(WebString::fromUTF8(charset));
255
256 HeaderCopier copier(&response);
257 original_response_.visitHTTPHeaderFields(&copier);
258
259 for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
260 std::string name(kReplaceHeaders[i]);
261 std::string value = net::GetSpecificHeader(headers, name);
262 if (!value.empty()) {
263 response.setHTTPHeaderField(WebString::fromUTF8(name),
264 WebString::fromUTF8(value));
265 }
266 }
267 // To avoid recording every multipart load as a separate visit in
268 // the history database, we want to keep track of whether the response
269 // is part of a multipart payload. We do want to record the first visit,
270 // so we only set isMultipartPayload to true after the first visit.
271 response.setIsMultipartPayload(has_sent_first_response_);
272 has_sent_first_response_ = true;
273 // Send the response!
274 if (client_)
275 client_->didReceiveResponse(loader_, response);
276
277 return true;
278 }
279
280 // Boundaries are supposed to be preceeded with --, but it looks like gecko
281 // doesn't require the dashes to exist. See nsMultiMixedConv::FindToken.
FindBoundary()282 size_t MultipartResponseDelegate::FindBoundary() {
283 size_t boundary_pos = data_.find(boundary_);
284 if (boundary_pos != std::string::npos) {
285 // Back up over -- for backwards compat
286 // TODO(tc): Don't we only want to do this once? Gecko code doesn't seem
287 // to care.
288 if (boundary_pos >= 2) {
289 if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) {
290 boundary_pos -= 2;
291 boundary_ = "--" + boundary_;
292 }
293 }
294 }
295 return boundary_pos;
296 }
297
ReadMultipartBoundary(const WebURLResponse & response,std::string * multipart_boundary)298 bool MultipartResponseDelegate::ReadMultipartBoundary(
299 const WebURLResponse& response,
300 std::string* multipart_boundary) {
301 std::string content_type =
302 response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8();
303
304 size_t boundary_start_offset = content_type.find("boundary=");
305 if (boundary_start_offset == std::string::npos)
306 return false;
307
308 boundary_start_offset += strlen("boundary=");
309
310 size_t boundary_end_offset = content_type.find(';', boundary_start_offset);
311
312 if (boundary_end_offset == std::string::npos)
313 boundary_end_offset = content_type.length();
314
315 size_t boundary_length = boundary_end_offset - boundary_start_offset;
316
317 *multipart_boundary =
318 content_type.substr(boundary_start_offset, boundary_length);
319 // The byte range response can have quoted boundary strings. This is legal
320 // as per MIME specifications. Individual data fragements however don't
321 // contain quoted boundary strings.
322 TrimString(*multipart_boundary, "\"", multipart_boundary);
323 return true;
324 }
325
ReadContentRanges(const WebURLResponse & response,int * content_range_lower_bound,int * content_range_upper_bound,int * content_range_instance_size)326 bool MultipartResponseDelegate::ReadContentRanges(
327 const WebURLResponse& response,
328 int* content_range_lower_bound,
329 int* content_range_upper_bound,
330 int* content_range_instance_size) {
331
332 std::string content_range = response.httpHeaderField("Content-Range").utf8();
333 if (content_range.empty()) {
334 content_range = response.httpHeaderField("Range").utf8();
335 }
336
337 if (content_range.empty()) {
338 DLOG(WARNING) << "Failed to read content range from response.";
339 return false;
340 }
341
342 size_t byte_range_lower_bound_start_offset = content_range.find(" ");
343 if (byte_range_lower_bound_start_offset == std::string::npos) {
344 return false;
345 }
346
347 // Skip over the initial space.
348 byte_range_lower_bound_start_offset++;
349
350 // Find the lower bound.
351 size_t byte_range_lower_bound_end_offset =
352 content_range.find("-", byte_range_lower_bound_start_offset);
353 if (byte_range_lower_bound_end_offset == std::string::npos) {
354 return false;
355 }
356
357 size_t byte_range_lower_bound_characters =
358 byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset;
359 std::string byte_range_lower_bound =
360 content_range.substr(byte_range_lower_bound_start_offset,
361 byte_range_lower_bound_characters);
362
363 // Find the upper bound.
364 size_t byte_range_upper_bound_start_offset =
365 byte_range_lower_bound_end_offset + 1;
366
367 size_t byte_range_upper_bound_end_offset =
368 content_range.find("/", byte_range_upper_bound_start_offset);
369 if (byte_range_upper_bound_end_offset == std::string::npos) {
370 return false;
371 }
372
373 size_t byte_range_upper_bound_characters =
374 byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset;
375 std::string byte_range_upper_bound =
376 content_range.substr(byte_range_upper_bound_start_offset,
377 byte_range_upper_bound_characters);
378
379 // Find the instance size.
380 size_t byte_range_instance_size_start_offset =
381 byte_range_upper_bound_end_offset + 1;
382
383 size_t byte_range_instance_size_end_offset =
384 content_range.length();
385
386 size_t byte_range_instance_size_characters =
387 byte_range_instance_size_end_offset -
388 byte_range_instance_size_start_offset;
389 std::string byte_range_instance_size =
390 content_range.substr(byte_range_instance_size_start_offset,
391 byte_range_instance_size_characters);
392
393 if (!base::StringToInt(byte_range_lower_bound, content_range_lower_bound))
394 return false;
395 if (!base::StringToInt(byte_range_upper_bound, content_range_upper_bound))
396 return false;
397 if (!base::StringToInt(byte_range_instance_size, content_range_instance_size))
398 return false;
399 return true;
400 }
401
402 } // namespace webkit_glue
403