// Copyright 2016 The PDFium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "core/fpdfapi/parser/cpdf_stream.h" #include #include #include #include "constants/stream_dict_common.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_encryptor.h" #include "core/fpdfapi/parser/cpdf_flateencoder.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_stream_acc.h" #include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fpdfapi/parser/fpdf_parser_utility.h" #include "core/fxcrt/cfx_memorystream.h" #include "core/fxcrt/check.h" #include "core/fxcrt/containers/contains.h" #include "core/fxcrt/data_vector.h" #include "core/fxcrt/fx_stream.h" #include "core/fxcrt/numerics/safe_conversions.h" #include "core/fxcrt/span_util.h" namespace { bool IsMetaDataStreamDictionary(const CPDF_Dictionary* dict) { // See ISO 32000-1:2008 spec, table 315. return ValidateDictType(dict, "Metadata") && dict->GetNameFor("Subtype") == "XML"; } } // namespace CPDF_Stream::CPDF_Stream(RetainPtr dict) : CPDF_Stream(DataVector(), std::move(dict)) {} CPDF_Stream::CPDF_Stream(pdfium::span span) : dict_(pdfium::MakeRetain()) { SetData(span); } CPDF_Stream::CPDF_Stream(fxcrt::ostringstream* stream) : dict_(pdfium::MakeRetain()) { SetDataFromStringstream(stream); } CPDF_Stream::CPDF_Stream(RetainPtr file, RetainPtr dict) : data_(std::move(file)), dict_(std::move(dict)) { CHECK(dict_->IsInline()); SetLengthInDict(pdfium::checked_cast( absl::get>(data_)->GetSize())); } CPDF_Stream::CPDF_Stream(DataVector data, RetainPtr dict) : data_(std::move(data)), dict_(std::move(dict)) { CHECK(dict_->IsInline()); SetLengthInDict( pdfium::checked_cast(absl::get>(data_).size())); } CPDF_Stream::~CPDF_Stream() { m_ObjNum = kInvalidObjNum; if (dict_->GetObjNum() == kInvalidObjNum) { dict_.Leak(); // lowercase release, release ownership. } } CPDF_Object::Type CPDF_Stream::GetType() const { return kStream; } const CPDF_Dictionary* CPDF_Stream::GetDictInternal() const { return dict_.Get(); } CPDF_Stream* CPDF_Stream::AsMutableStream() { return this; } void CPDF_Stream::InitStreamFromFile(RetainPtr file) { const int size = pdfium::checked_cast(file->GetSize()); data_ = std::move(file); dict_ = pdfium::MakeRetain(); SetLengthInDict(size); } RetainPtr CPDF_Stream::Clone() const { return CloneObjectNonCyclic(false); } RetainPtr CPDF_Stream::CloneNonCyclic( bool bDirect, std::set* pVisited) const { pVisited->insert(this); auto pAcc = pdfium::MakeRetain(pdfium::WrapRetain(this)); pAcc->LoadAllDataRaw(); RetainPtr pDict = GetDict(); RetainPtr pNewDict; if (!pdfium::Contains(*pVisited, pDict.Get())) { pNewDict = ToDictionary(static_cast(pDict.Get()) ->CloneNonCyclic(bDirect, pVisited)); } return pdfium::MakeRetain(pAcc->DetachData(), std::move(pNewDict)); } void CPDF_Stream::SetDataAndRemoveFilter(pdfium::span pData) { SetData(pData); dict_->RemoveFor("Filter"); dict_->RemoveFor(pdfium::stream::kDecodeParms); } void CPDF_Stream::SetDataFromStringstreamAndRemoveFilter( fxcrt::ostringstream* stream) { if (stream->tellp() <= 0) { SetDataAndRemoveFilter({}); return; } SetDataAndRemoveFilter(pdfium::as_byte_span(stream->str()) .first(static_cast(stream->tellp()))); } void CPDF_Stream::SetData(pdfium::span pData) { DataVector data_copy(pData.begin(), pData.end()); TakeData(std::move(data_copy)); } void CPDF_Stream::TakeData(DataVector data) { const int size = pdfium::checked_cast(data.size()); data_ = std::move(data); SetLengthInDict(size); } void CPDF_Stream::SetDataFromStringstream(fxcrt::ostringstream* stream) { if (stream->tellp() <= 0) { SetData({}); return; } SetData(pdfium::as_byte_span(stream->str()) .first(static_cast(stream->tellp()))); } DataVector CPDF_Stream::ReadAllRawData() const { CHECK(IsFileBased()); DataVector result(GetRawSize()); DCHECK(!result.empty()); auto underlying_stream = absl::get>(data_); if (!underlying_stream->ReadBlockAtOffset(result, 0)) return DataVector(); return result; } bool CPDF_Stream::HasFilter() const { return dict_->KeyExist("Filter"); } WideString CPDF_Stream::GetUnicodeText() const { auto pAcc = pdfium::MakeRetain(pdfium::WrapRetain(this)); pAcc->LoadAllDataFiltered(); return PDF_DecodeText(pAcc->GetSpan()); } bool CPDF_Stream::WriteTo(IFX_ArchiveStream* archive, const CPDF_Encryptor* encryptor) const { const bool is_metadata = IsMetaDataStreamDictionary(GetDict().Get()); CPDF_FlateEncoder encoder(pdfium::WrapRetain(this), !is_metadata); DataVector encrypted_data; pdfium::span data = encoder.GetSpan(); if (encryptor && !is_metadata) { encrypted_data = encryptor->Encrypt(data); data = encrypted_data; } encoder.UpdateLength(data.size()); if (!encoder.WriteDictTo(archive, encryptor)) return false; if (!archive->WriteString("stream\r\n")) return false; if (!archive->WriteBlock(data)) return false; return archive->WriteString("\r\nendstream"); } size_t CPDF_Stream::GetRawSize() const { if (IsFileBased()) { return pdfium::checked_cast( absl::get>(data_)->GetSize()); } return absl::get>(data_).size(); } pdfium::span CPDF_Stream::GetInMemoryRawData() const { DCHECK(IsMemoryBased()); return absl::get>(data_); } void CPDF_Stream::SetLengthInDict(int length) { dict_->SetNewFor("Length", length); }