1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Derived from:
6 // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
7 // The license block is:
8 /* ***** BEGIN LICENSE BLOCK *****
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10 *
11 * The contents of this file are subject to the Mozilla Public License Version
12 * 1.1 (the "License"); you may not use this file except in compliance with
13 * the License. You may obtain a copy of the License at
14 * http://www.mozilla.org/MPL/
15 *
16 * Software distributed under the License is distributed on an "AS IS" basis,
17 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
18 * for the specific language governing rights and limitations under the
19 * License.
20 *
21 * The Original Code is Mozilla.
22 *
23 * The Initial Developer of the Original Code is
24 * Netscape Communications.
25 * Portions created by the Initial Developer are Copyright (C) 2001
26 * the Initial Developer. All Rights Reserved.
27 *
28 * Contributor(s):
29 * Darin Fisher <darin@netscape.com> (original author)
30 *
31 * Alternatively, the contents of this file may be used under the terms of
32 * either the GNU General Public License Version 2 or later (the "GPL"), or
33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34 * in which case the provisions of the GPL or the LGPL are applicable instead
35 * of those above. If you wish to allow use of your version of this file only
36 * under the terms of either the GPL or the LGPL, and not to allow others to
37 * use your version of this file under the terms of the MPL, indicate your
38 * decision by deleting the provisions above and replace them with the notice
39 * and other provisions required by the GPL or the LGPL. If you do not delete
40 * the provisions above, a recipient may use your version of this file under
41 * the terms of any one of the MPL, the GPL or the LGPL.
42 *
43 * ***** END LICENSE BLOCK ***** */
44
45 #include "net/http/http_chunked_decoder.h"
46
47 #include <algorithm>
48
49 #include "base/logging.h"
50 #include "base/strings/string_number_conversions.h"
51 #include "base/strings/string_piece.h"
52 #include "base/strings/string_util.h"
53 #include "net/base/net_errors.h"
54
55 namespace net {
56
57 // Absurdly long size to avoid imposing a constraint on chunked encoding
58 // extensions.
59 const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384;
60
61 HttpChunkedDecoder::HttpChunkedDecoder() = default;
62
FilterBuf(char * buf,int buf_len)63 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
64 int result = 0;
65
66 while (buf_len > 0) {
67 if (chunk_remaining_ > 0) {
68 // Since |chunk_remaining_| is positive and |buf_len| an int, the minimum
69 // of the two must be an int.
70 int num = static_cast<int>(
71 std::min(chunk_remaining_, static_cast<int64_t>(buf_len)));
72
73 buf_len -= num;
74 chunk_remaining_ -= num;
75
76 result += num;
77 buf += num;
78
79 // After each chunk's data there should be a CRLF.
80 if (chunk_remaining_ == 0)
81 chunk_terminator_remaining_ = true;
82 continue;
83 } else if (reached_eof_) {
84 bytes_after_eof_ += buf_len;
85 break; // Done!
86 }
87
88 int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
89 if (bytes_consumed < 0)
90 return bytes_consumed; // Error
91
92 buf_len -= bytes_consumed;
93 if (buf_len > 0)
94 memmove(buf, buf + bytes_consumed, buf_len);
95 }
96
97 return result;
98 }
99
ScanForChunkRemaining(const char * buf,int buf_len)100 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
101 DCHECK_EQ(0, chunk_remaining_);
102 DCHECK_GT(buf_len, 0);
103
104 int bytes_consumed = 0;
105
106 size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n');
107 if (index_of_lf != base::StringPiece::npos) {
108 buf_len = static_cast<int>(index_of_lf);
109 if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR.
110 buf_len--;
111 bytes_consumed = static_cast<int>(index_of_lf) + 1;
112
113 // Make buf point to the full line buffer to parse.
114 if (!line_buf_.empty()) {
115 line_buf_.append(buf, buf_len);
116 buf = line_buf_.data();
117 buf_len = static_cast<int>(line_buf_.size());
118 }
119
120 if (reached_last_chunk_) {
121 if (buf_len > 0)
122 DVLOG(1) << "ignoring http trailer";
123 else
124 reached_eof_ = true;
125 } else if (chunk_terminator_remaining_) {
126 if (buf_len > 0) {
127 DLOG(ERROR) << "chunk data not terminated properly";
128 return ERR_INVALID_CHUNKED_ENCODING;
129 }
130 chunk_terminator_remaining_ = false;
131 } else if (buf_len > 0) {
132 // Ignore any chunk-extensions.
133 size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';');
134 if (index_of_semicolon != base::StringPiece::npos)
135 buf_len = static_cast<int>(index_of_semicolon);
136
137 if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
138 DLOG(ERROR) << "Failed parsing HEX from: " <<
139 std::string(buf, buf_len);
140 return ERR_INVALID_CHUNKED_ENCODING;
141 }
142
143 if (chunk_remaining_ == 0)
144 reached_last_chunk_ = true;
145 } else {
146 DLOG(ERROR) << "missing chunk-size";
147 return ERR_INVALID_CHUNKED_ENCODING;
148 }
149 line_buf_.clear();
150 } else {
151 // Save the partial line; wait for more data.
152 bytes_consumed = buf_len;
153
154 // Ignore a trailing CR
155 if (buf[buf_len - 1] == '\r')
156 buf_len--;
157
158 if (line_buf_.length() + buf_len > kMaxLineBufLen) {
159 DLOG(ERROR) << "Chunked line length too long";
160 return ERR_INVALID_CHUNKED_ENCODING;
161 }
162
163 line_buf_.append(buf, buf_len);
164 }
165 return bytes_consumed;
166 }
167
168
169 // While the HTTP 1.1 specification defines chunk-size as 1*HEX
170 // some sites rely on more lenient parsing.
171 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
172 // (0x20) to be 7 characters long, such as "819b ".
173 //
174 // A comparison of browsers running on WindowsXP shows that
175 // they will parse the following inputs (egrep syntax):
176 //
177 // Let \X be the character class for a hex digit: [0-9a-fA-F]
178 //
179 // RFC 7230: ^\X+$
180 // IE7: ^\X+[^\X]*$
181 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$
182 // Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
183 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
184 //
185 // Our strategy is to be as strict as possible, while not breaking
186 // known sites.
187 //
188 // Us: ^\X+[ ]*$
ParseChunkSize(const char * start,int len,int64_t * out)189 bool HttpChunkedDecoder::ParseChunkSize(const char* start,
190 int len,
191 int64_t* out) {
192 DCHECK_GE(len, 0);
193
194 // Strip trailing spaces
195 while (len > 0 && start[len - 1] == ' ')
196 len--;
197
198 // Be more restrictive than HexStringToInt64;
199 // don't allow inputs with leading "-", "+", "0x", "0X"
200 base::StringPiece chunk_size(start, len);
201 if (chunk_size.find_first_not_of("0123456789abcdefABCDEF")
202 != base::StringPiece::npos) {
203 return false;
204 }
205
206 int64_t parsed_number;
207 bool ok = base::HexStringToInt64(chunk_size, &parsed_number);
208 if (ok && parsed_number >= 0) {
209 *out = parsed_number;
210 return true;
211 }
212 return false;
213 }
214
215 } // namespace net
216