• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Derived from:
6 //   mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
7 // The license block is:
8 /* ***** BEGIN LICENSE BLOCK *****
9  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10  *
11  * The contents of this file are subject to the Mozilla Public License Version
12  * 1.1 (the "License"); you may not use this file except in compliance with
13  * the License. You may obtain a copy of the License at
14  * http://www.mozilla.org/MPL/
15  *
16  * Software distributed under the License is distributed on an "AS IS" basis,
17  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
18  * for the specific language governing rights and limitations under the
19  * License.
20  *
21  * The Original Code is Mozilla.
22  *
23  * The Initial Developer of the Original Code is
24  * Netscape Communications.
25  * Portions created by the Initial Developer are Copyright (C) 2001
26  * the Initial Developer. All Rights Reserved.
27  *
28  * Contributor(s):
29  *   Darin Fisher <darin@netscape.com> (original author)
30  *
31  * Alternatively, the contents of this file may be used under the terms of
32  * either the GNU General Public License Version 2 or later (the "GPL"), or
33  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34  * in which case the provisions of the GPL or the LGPL are applicable instead
35  * of those above. If you wish to allow use of your version of this file only
36  * under the terms of either the GPL or the LGPL, and not to allow others to
37  * use your version of this file under the terms of the MPL, indicate your
38  * decision by deleting the provisions above and replace them with the notice
39  * and other provisions required by the GPL or the LGPL. If you do not delete
40  * the provisions above, a recipient may use your version of this file under
41  * the terms of any one of the MPL, the GPL or the LGPL.
42  *
43  * ***** END LICENSE BLOCK ***** */
44 
45 #include "net/http/http_chunked_decoder.h"
46 
47 #include "base/logging.h"
48 #include "base/string_number_conversions.h"
49 #include "base/string_piece.h"
50 #include "base/string_util.h"
51 #include "net/base/net_errors.h"
52 
53 namespace net {
54 
HttpChunkedDecoder()55 HttpChunkedDecoder::HttpChunkedDecoder()
56     : chunk_remaining_(0),
57       chunk_terminator_remaining_(false),
58       reached_last_chunk_(false),
59       reached_eof_(false),
60       bytes_after_eof_(0) {
61 }
62 
FilterBuf(char * buf,int buf_len)63 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
64   int result = 0;
65 
66   while (buf_len) {
67     if (chunk_remaining_) {
68       int num = std::min(chunk_remaining_, buf_len);
69 
70       buf_len -= num;
71       chunk_remaining_ -= num;
72 
73       result += num;
74       buf += num;
75 
76       // After each chunk's data there should be a CRLF
77       if (!chunk_remaining_)
78         chunk_terminator_remaining_ = true;
79       continue;
80     } else if (reached_eof_) {
81       bytes_after_eof_ += buf_len;
82       break;  // Done!
83     }
84 
85     int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
86     if (bytes_consumed < 0)
87       return bytes_consumed; // Error
88 
89     buf_len -= bytes_consumed;
90     if (buf_len)
91       memmove(buf, buf + bytes_consumed, buf_len);
92   }
93 
94   return result;
95 }
96 
ScanForChunkRemaining(const char * buf,int buf_len)97 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
98   DCHECK(chunk_remaining_ == 0);
99   DCHECK(buf_len > 0);
100 
101   int bytes_consumed = 0;
102 
103   size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n');
104   if (index_of_lf != base::StringPiece::npos) {
105     buf_len = static_cast<int>(index_of_lf);
106     if (buf_len && buf[buf_len - 1] == '\r')  // Eliminate a preceding CR.
107       buf_len--;
108     bytes_consumed = static_cast<int>(index_of_lf) + 1;
109 
110     // Make buf point to the full line buffer to parse.
111     if (!line_buf_.empty()) {
112       line_buf_.append(buf, buf_len);
113       buf = line_buf_.data();
114       buf_len = static_cast<int>(line_buf_.size());
115     }
116 
117     if (reached_last_chunk_) {
118       if (buf_len)
119         DVLOG(1) << "ignoring http trailer";
120       else
121         reached_eof_ = true;
122     } else if (chunk_terminator_remaining_) {
123       if (buf_len) {
124         DLOG(ERROR) << "chunk data not terminated properly";
125         return ERR_INVALID_CHUNKED_ENCODING;
126       }
127       chunk_terminator_remaining_ = false;
128     } else if (buf_len) {
129       // Ignore any chunk-extensions.
130       size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';');
131       if (index_of_semicolon != base::StringPiece::npos)
132         buf_len = static_cast<int>(index_of_semicolon);
133 
134       if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
135         DLOG(ERROR) << "Failed parsing HEX from: " <<
136             std::string(buf, buf_len);
137         return ERR_INVALID_CHUNKED_ENCODING;
138       }
139 
140       if (chunk_remaining_ == 0)
141         reached_last_chunk_ = true;
142     } else {
143       DLOG(ERROR) << "missing chunk-size";
144       return ERR_INVALID_CHUNKED_ENCODING;
145     }
146     line_buf_.clear();
147   } else {
148     // Save the partial line; wait for more data.
149     bytes_consumed = buf_len;
150 
151     // Ignore a trailing CR
152     if (buf[buf_len - 1] == '\r')
153       buf_len--;
154 
155     line_buf_.append(buf, buf_len);
156   }
157   return bytes_consumed;
158 }
159 
160 
161 // While the HTTP 1.1 specification defines chunk-size as 1*HEX
162 // some sites rely on more lenient parsing.
163 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
164 // (0x20) to be 7 characters long, such as "819b   ".
165 //
166 // A comparison of browsers running on WindowsXP shows that
167 // they will parse the following inputs (egrep syntax):
168 //
169 // Let \X be the character class for a hex digit: [0-9a-fA-F]
170 //
171 //   RFC 2616: ^\X+$
172 //        IE7: ^\X+[^\X]*$
173 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$
174 //  Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
175 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
176 //
177 // Our strategy is to be as strict as possible, while not breaking
178 // known sites.
179 //
180 //         Us: ^\X+[ ]*$
ParseChunkSize(const char * start,int len,int * out)181 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) {
182   DCHECK(len >= 0);
183 
184   // Strip trailing spaces
185   while (len && start[len - 1] == ' ')
186     len--;
187 
188   // Be more restrictive than HexStringToInt;
189   // don't allow inputs with leading "-", "+", "0x", "0X"
190   if (base::StringPiece(start, len).find_first_not_of("0123456789abcdefABCDEF")
191       != base::StringPiece::npos)
192     return false;
193 
194   int parsed_number;
195   bool ok = base::HexStringToInt(start, start + len, &parsed_number);
196   if (ok && parsed_number >= 0) {
197     *out = parsed_number;
198     return true;
199   }
200   return false;
201 }
202 
203 }  // namespace net
204