// Copyright 2011 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Derived from: // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp // The license block is: /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Mozilla. * * The Initial Developer of the Original Code is * Netscape Communications. * Portions created by the Initial Developer are Copyright (C) 2001 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Darin Fisher (original author) * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include "net/http/http_chunked_decoder.h" #include #include #include "base/logging.h" #include "base/numerics/safe_conversions.h" #include "base/ranges/algorithm.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_util.h" #include "net/base/net_errors.h" namespace net { // Absurdly long size to avoid imposing a constraint on chunked encoding // extensions. const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384; HttpChunkedDecoder::HttpChunkedDecoder() = default; int HttpChunkedDecoder::FilterBuf(base::span buf) { size_t result = 0; while (buf.size() > 0) { if (chunk_remaining_ > 0) { size_t num = std::min(base::saturated_cast(chunk_remaining_), buf.size()); chunk_remaining_ -= num; result += num; buf = buf.subspan(num); // After each chunk's data there should be a CRLF. if (chunk_remaining_ == 0) chunk_terminator_remaining_ = true; continue; } else if (reached_eof_) { bytes_after_eof_ += buf.size(); break; // Done! } int bytes_consumed = ScanForChunkRemaining(buf); if (bytes_consumed < 0) return bytes_consumed; // Error base::span subspan = buf.subspan(base::checked_cast(bytes_consumed)); if (!subspan.empty()) { buf.copy_prefix_from(subspan); } buf = buf.first(subspan.size()); } // TODO(Kelsen): the return type should become size_t. return base::checked_cast(result); } int HttpChunkedDecoder::ScanForChunkRemaining(base::span buf) { int bytes_consumed = 0; size_t index_of_lf = base::as_string_view(buf).find('\n'); if (index_of_lf != std::string_view::npos) { buf = buf.first(index_of_lf); // Eliminate a preceding CR. if (!buf.empty() && buf.back() == '\r') { buf = buf.first(buf.size() - 1u); } bytes_consumed = static_cast(index_of_lf) + 1; // Make buf point to the full line buffer to parse. if (!line_buf_.empty()) { line_buf_.append(base::as_string_view(buf)); buf = base::as_byte_span(line_buf_); } if (reached_last_chunk_) { if (!buf.empty()) { DVLOG(1) << "ignoring http trailer"; } else { reached_eof_ = true; } } else if (chunk_terminator_remaining_) { if (!buf.empty()) { DLOG(ERROR) << "chunk data not terminated properly"; return ERR_INVALID_CHUNKED_ENCODING; } chunk_terminator_remaining_ = false; } else if (!buf.empty()) { // Ignore any chunk-extensions. size_t index_of_semicolon = base::as_string_view(buf).find(';'); if (index_of_semicolon != std::string_view::npos) { buf = buf.first(index_of_semicolon); } if (!ParseChunkSize(buf, &chunk_remaining_)) { DLOG(ERROR) << "Failed parsing HEX from: " << base::as_string_view(buf); return ERR_INVALID_CHUNKED_ENCODING; } if (chunk_remaining_ == 0) reached_last_chunk_ = true; } else { DLOG(ERROR) << "missing chunk-size"; return ERR_INVALID_CHUNKED_ENCODING; } line_buf_.clear(); } else { // Save the partial line; wait for more data. bytes_consumed = buf.size(); // Ignore a trailing CR if (buf.back() == '\r') { buf = buf.first(buf.size() - 1); } if (line_buf_.length() + buf.size() > kMaxLineBufLen) { DLOG(ERROR) << "Chunked line length too long"; return ERR_INVALID_CHUNKED_ENCODING; } line_buf_.append(base::as_string_view(buf)); } return bytes_consumed; } // While the HTTP 1.1 specification defines chunk-size as 1*HEX // some sites rely on more lenient parsing. // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces // (0x20) to be 7 characters long, such as "819b ". // // A comparison of browsers running on WindowsXP shows that // they will parse the following inputs (egrep syntax): // // Let \X be the character class for a hex digit: [0-9a-fA-F] // // RFC 7230: ^\X+$ // IE7: ^\X+[^\X]*$ // Safari 3.1: ^[\t\r ]*\X+[\t ]*$ // Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$ // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$ // // Our strategy is to be as strict as possible, while not breaking // known sites. // // Us: ^\X+[ ]*$ bool HttpChunkedDecoder::ParseChunkSize(base::span buf, uint64_t* out) { // Strip trailing spaces while (!buf.empty() && buf.back() == ' ') { buf = buf.first(buf.size() - 1u); } // Be more restrictive than HexStringToInt64; // don't allow inputs with leading "-", "+", "0x", "0X" std::string_view chunk_size = base::as_string_view(buf); if (!base::ranges::all_of(chunk_size, base::IsHexDigit)) { return false; } int64_t parsed_number; bool ok = base::HexStringToInt64(chunk_size, &parsed_number); if (ok && parsed_number >= 0) { *out = parsed_number; return true; } return false; } } // namespace net