• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Derived from:
6 //   mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
7 // The license block is:
8 /* ***** BEGIN LICENSE BLOCK *****
9  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10  *
11  * The contents of this file are subject to the Mozilla Public License Version
12  * 1.1 (the "License"); you may not use this file except in compliance with
13  * the License. You may obtain a copy of the License at
14  * http://www.mozilla.org/MPL/
15  *
16  * Software distributed under the License is distributed on an "AS IS" basis,
17  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
18  * for the specific language governing rights and limitations under the
19  * License.
20  *
21  * The Original Code is Mozilla.
22  *
23  * The Initial Developer of the Original Code is
24  * Netscape Communications.
25  * Portions created by the Initial Developer are Copyright (C) 2001
26  * the Initial Developer. All Rights Reserved.
27  *
28  * Contributor(s):
29  *   Darin Fisher <darin@netscape.com> (original author)
30  *
31  * Alternatively, the contents of this file may be used under the terms of
32  * either the GNU General Public License Version 2 or later (the "GPL"), or
33  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34  * in which case the provisions of the GPL or the LGPL are applicable instead
35  * of those above. If you wish to allow use of your version of this file only
36  * under the terms of either the GPL or the LGPL, and not to allow others to
37  * use your version of this file under the terms of the MPL, indicate your
38  * decision by deleting the provisions above and replace them with the notice
39  * and other provisions required by the GPL or the LGPL. If you do not delete
40  * the provisions above, a recipient may use your version of this file under
41  * the terms of any one of the MPL, the GPL or the LGPL.
42  *
43  * ***** END LICENSE BLOCK ***** */
44 
45 #include "net/http/http_chunked_decoder.h"
46 
47 #include <algorithm>
48 #include <string_view>
49 
50 #include "base/logging.h"
51 #include "base/numerics/safe_conversions.h"
52 #include "base/ranges/algorithm.h"
53 #include "base/strings/string_number_conversions.h"
54 #include "base/strings/string_util.h"
55 #include "net/base/net_errors.h"
56 
57 namespace net {
58 
59 // Absurdly long size to avoid imposing a constraint on chunked encoding
60 // extensions.
61 const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384;
62 
63 HttpChunkedDecoder::HttpChunkedDecoder() = default;
64 
FilterBuf(base::span<uint8_t> buf)65 int HttpChunkedDecoder::FilterBuf(base::span<uint8_t> buf) {
66   size_t result = 0;
67   while (buf.size() > 0) {
68     if (chunk_remaining_ > 0) {
69       size_t num =
70           std::min(base::saturated_cast<size_t>(chunk_remaining_), buf.size());
71 
72       chunk_remaining_ -= num;
73       result += num;
74       buf = buf.subspan(num);
75 
76       // After each chunk's data there should be a CRLF.
77       if (chunk_remaining_ == 0)
78         chunk_terminator_remaining_ = true;
79       continue;
80     } else if (reached_eof_) {
81       bytes_after_eof_ += buf.size();
82       break;  // Done!
83     }
84 
85     int bytes_consumed = ScanForChunkRemaining(buf);
86     if (bytes_consumed < 0)
87       return bytes_consumed; // Error
88 
89     base::span<const uint8_t> subspan =
90         buf.subspan(base::checked_cast<size_t>(bytes_consumed));
91     if (!subspan.empty()) {
92       buf.copy_prefix_from(subspan);
93     }
94     buf = buf.first(subspan.size());
95   }
96   // TODO(Kelsen): the return type should become size_t.
97   return base::checked_cast<int>(result);
98 }
99 
ScanForChunkRemaining(base::span<const uint8_t> buf)100 int HttpChunkedDecoder::ScanForChunkRemaining(base::span<const uint8_t> buf) {
101   int bytes_consumed = 0;
102 
103   size_t index_of_lf = base::as_string_view(buf).find('\n');
104   if (index_of_lf != std::string_view::npos) {
105     buf = buf.first(index_of_lf);
106     // Eliminate a preceding CR.
107     if (!buf.empty() && buf.back() == '\r') {
108       buf = buf.first(buf.size() - 1u);
109     }
110     bytes_consumed = static_cast<int>(index_of_lf) + 1;
111 
112     // Make buf point to the full line buffer to parse.
113     if (!line_buf_.empty()) {
114       line_buf_.append(base::as_string_view(buf));
115       buf = base::as_byte_span(line_buf_);
116     }
117 
118     if (reached_last_chunk_) {
119       if (!buf.empty()) {
120         DVLOG(1) << "ignoring http trailer";
121       } else {
122         reached_eof_ = true;
123       }
124     } else if (chunk_terminator_remaining_) {
125       if (!buf.empty()) {
126         DLOG(ERROR) << "chunk data not terminated properly";
127         return ERR_INVALID_CHUNKED_ENCODING;
128       }
129       chunk_terminator_remaining_ = false;
130     } else if (!buf.empty()) {
131       // Ignore any chunk-extensions.
132       size_t index_of_semicolon = base::as_string_view(buf).find(';');
133       if (index_of_semicolon != std::string_view::npos) {
134         buf = buf.first(index_of_semicolon);
135       }
136 
137       if (!ParseChunkSize(buf, &chunk_remaining_)) {
138         DLOG(ERROR) << "Failed parsing HEX from: " << base::as_string_view(buf);
139         return ERR_INVALID_CHUNKED_ENCODING;
140       }
141 
142       if (chunk_remaining_ == 0)
143         reached_last_chunk_ = true;
144     } else {
145       DLOG(ERROR) << "missing chunk-size";
146       return ERR_INVALID_CHUNKED_ENCODING;
147     }
148     line_buf_.clear();
149   } else {
150     // Save the partial line; wait for more data.
151     bytes_consumed = buf.size();
152 
153     // Ignore a trailing CR
154     if (buf.back() == '\r') {
155       buf = buf.first(buf.size() - 1);
156     }
157 
158     if (line_buf_.length() + buf.size() > kMaxLineBufLen) {
159       DLOG(ERROR) << "Chunked line length too long";
160       return ERR_INVALID_CHUNKED_ENCODING;
161     }
162 
163     line_buf_.append(base::as_string_view(buf));
164   }
165   return bytes_consumed;
166 }
167 
168 // While the HTTP 1.1 specification defines chunk-size as 1*HEX
169 // some sites rely on more lenient parsing.
170 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
171 // (0x20) to be 7 characters long, such as "819b   ".
172 //
173 // A comparison of browsers running on WindowsXP shows that
174 // they will parse the following inputs (egrep syntax):
175 //
176 // Let \X be the character class for a hex digit: [0-9a-fA-F]
177 //
178 //   RFC 7230: ^\X+$
179 //        IE7: ^\X+[^\X]*$
180 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$
181 //  Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
182 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
183 //
184 // Our strategy is to be as strict as possible, while not breaking
185 // known sites.
186 //
187 //         Us: ^\X+[ ]*$
ParseChunkSize(base::span<const uint8_t> buf,uint64_t * out)188 bool HttpChunkedDecoder::ParseChunkSize(base::span<const uint8_t> buf,
189                                         uint64_t* out) {
190   // Strip trailing spaces
191   while (!buf.empty() && buf.back() == ' ') {
192     buf = buf.first(buf.size() - 1u);
193   }
194 
195   // Be more restrictive than HexStringToInt64;
196   // don't allow inputs with leading "-", "+", "0x", "0X"
197   std::string_view chunk_size = base::as_string_view(buf);
198   if (!base::ranges::all_of(chunk_size, base::IsHexDigit<char>)) {
199     return false;
200   }
201 
202   int64_t parsed_number;
203   bool ok = base::HexStringToInt64(chunk_size, &parsed_number);
204   if (ok && parsed_number >= 0) {
205     *out = parsed_number;
206     return true;
207   }
208   return false;
209 }
210 
211 }  // namespace net
212