1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_stream.h"
8
9 #include <stdint.h>
10
11 #include <sstream>
12 #include <utility>
13
14 #include "constants/stream_dict_common.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_encryptor.h"
17 #include "core/fpdfapi/parser/cpdf_flateencoder.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
20 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
21 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
22 #include "core/fxcrt/cfx_memorystream.h"
23 #include "core/fxcrt/data_vector.h"
24 #include "core/fxcrt/fx_stream.h"
25 #include "core/fxcrt/span_util.h"
26 #include "third_party/base/containers/contains.h"
27 #include "third_party/base/numerics/safe_conversions.h"
28
29 namespace {
30
IsMetaDataStreamDictionary(const CPDF_Dictionary * dict)31 bool IsMetaDataStreamDictionary(const CPDF_Dictionary* dict) {
32 // See ISO 32000-1:2008 spec, table 315.
33 return ValidateDictType(dict, "Metadata") &&
34 dict->GetNameFor("Subtype") == "XML";
35 }
36
37 } // namespace
38
39 CPDF_Stream::CPDF_Stream() = default;
40
CPDF_Stream(RetainPtr<CPDF_Dictionary> pDict)41 CPDF_Stream::CPDF_Stream(RetainPtr<CPDF_Dictionary> pDict)
42 : CPDF_Stream(DataVector<uint8_t>(), std::move(pDict)) {}
43
CPDF_Stream(DataVector<uint8_t> pData,RetainPtr<CPDF_Dictionary> pDict)44 CPDF_Stream::CPDF_Stream(DataVector<uint8_t> pData,
45 RetainPtr<CPDF_Dictionary> pDict)
46 : data_(std::move(pData)), dict_(std::move(pDict)) {
47 SetLengthInDict(pdfium::base::checked_cast<int>(
48 absl::get<DataVector<uint8_t>>(data_).size()));
49 }
50
~CPDF_Stream()51 CPDF_Stream::~CPDF_Stream() {
52 m_ObjNum = kInvalidObjNum;
53 if (dict_ && dict_->GetObjNum() == kInvalidObjNum)
54 dict_.Leak(); // lowercase release, release ownership.
55 }
56
GetType() const57 CPDF_Object::Type CPDF_Stream::GetType() const {
58 return kStream;
59 }
60
GetDictInternal() const61 const CPDF_Dictionary* CPDF_Stream::GetDictInternal() const {
62 return dict_.Get();
63 }
64
AsMutableStream()65 CPDF_Stream* CPDF_Stream::AsMutableStream() {
66 return this;
67 }
68
InitStreamWithEmptyData(RetainPtr<CPDF_Dictionary> pDict)69 void CPDF_Stream::InitStreamWithEmptyData(RetainPtr<CPDF_Dictionary> pDict) {
70 dict_ = std::move(pDict);
71 TakeData({});
72 }
73
InitStreamFromFile(RetainPtr<IFX_SeekableReadStream> pFile,RetainPtr<CPDF_Dictionary> pDict)74 void CPDF_Stream::InitStreamFromFile(RetainPtr<IFX_SeekableReadStream> pFile,
75 RetainPtr<CPDF_Dictionary> pDict) {
76 data_ = pFile;
77 dict_ = std::move(pDict);
78 SetLengthInDict(pdfium::base::checked_cast<int>(pFile->GetSize()));
79 }
80
Clone() const81 RetainPtr<CPDF_Object> CPDF_Stream::Clone() const {
82 return CloneObjectNonCyclic(false);
83 }
84
CloneNonCyclic(bool bDirect,std::set<const CPDF_Object * > * pVisited) const85 RetainPtr<CPDF_Object> CPDF_Stream::CloneNonCyclic(
86 bool bDirect,
87 std::set<const CPDF_Object*>* pVisited) const {
88 pVisited->insert(this);
89 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(this));
90 pAcc->LoadAllDataRaw();
91
92 RetainPtr<const CPDF_Dictionary> pDict = GetDict();
93 RetainPtr<CPDF_Dictionary> pNewDict;
94 if (pDict && !pdfium::Contains(*pVisited, pDict.Get())) {
95 pNewDict = ToDictionary(static_cast<const CPDF_Object*>(pDict.Get())
96 ->CloneNonCyclic(bDirect, pVisited));
97 }
98 return pdfium::MakeRetain<CPDF_Stream>(pAcc->DetachData(),
99 std::move(pNewDict));
100 }
101
SetDataAndRemoveFilter(pdfium::span<const uint8_t> pData)102 void CPDF_Stream::SetDataAndRemoveFilter(pdfium::span<const uint8_t> pData) {
103 SetData(pData);
104 dict_->RemoveFor("Filter");
105 dict_->RemoveFor(pdfium::stream::kDecodeParms);
106 }
107
SetDataFromStringstreamAndRemoveFilter(fxcrt::ostringstream * stream)108 void CPDF_Stream::SetDataFromStringstreamAndRemoveFilter(
109 fxcrt::ostringstream* stream) {
110 if (stream->tellp() <= 0) {
111 SetDataAndRemoveFilter({});
112 return;
113 }
114
115 SetDataAndRemoveFilter(
116 {reinterpret_cast<const uint8_t*>(stream->str().c_str()),
117 static_cast<size_t>(stream->tellp())});
118 }
119
SetData(pdfium::span<const uint8_t> pData)120 void CPDF_Stream::SetData(pdfium::span<const uint8_t> pData) {
121 DataVector<uint8_t> data_copy(pData.begin(), pData.end());
122 TakeData(std::move(data_copy));
123 }
124
TakeData(DataVector<uint8_t> data)125 void CPDF_Stream::TakeData(DataVector<uint8_t> data) {
126 const size_t size = data.size();
127 data_ = std::move(data);
128 SetLengthInDict(pdfium::base::checked_cast<int>(size));
129 }
130
SetDataFromStringstream(fxcrt::ostringstream * stream)131 void CPDF_Stream::SetDataFromStringstream(fxcrt::ostringstream* stream) {
132 if (stream->tellp() <= 0) {
133 SetData({});
134 return;
135 }
136 SetData({reinterpret_cast<const uint8_t*>(stream->str().c_str()),
137 static_cast<size_t>(stream->tellp())});
138 }
139
ReadAllRawData() const140 DataVector<uint8_t> CPDF_Stream::ReadAllRawData() const {
141 CHECK(IsFileBased());
142
143 DataVector<uint8_t> result(GetRawSize());
144 DCHECK(!result.empty());
145
146 auto underlying_stream = absl::get<RetainPtr<IFX_SeekableReadStream>>(data_);
147 if (!underlying_stream->ReadBlockAtOffset(result, 0))
148 return DataVector<uint8_t>();
149
150 return result;
151 }
152
HasFilter() const153 bool CPDF_Stream::HasFilter() const {
154 return dict_ && dict_->KeyExist("Filter");
155 }
156
GetUnicodeText() const157 WideString CPDF_Stream::GetUnicodeText() const {
158 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(this));
159 pAcc->LoadAllDataFiltered();
160 return PDF_DecodeText(pAcc->GetSpan());
161 }
162
WriteTo(IFX_ArchiveStream * archive,const CPDF_Encryptor * encryptor) const163 bool CPDF_Stream::WriteTo(IFX_ArchiveStream* archive,
164 const CPDF_Encryptor* encryptor) const {
165 const bool is_metadata = IsMetaDataStreamDictionary(GetDict().Get());
166 CPDF_FlateEncoder encoder(pdfium::WrapRetain(this), !is_metadata);
167
168 DataVector<uint8_t> encrypted_data;
169 pdfium::span<const uint8_t> data = encoder.GetSpan();
170 if (encryptor && !is_metadata) {
171 encrypted_data = encryptor->Encrypt(data);
172 data = encrypted_data;
173 }
174
175 encoder.UpdateLength(data.size());
176 if (!encoder.WriteDictTo(archive, encryptor))
177 return false;
178
179 if (!archive->WriteString("stream\r\n"))
180 return false;
181
182 if (!archive->WriteBlock(data))
183 return false;
184
185 return archive->WriteString("\r\nendstream");
186 }
187
GetRawSize() const188 size_t CPDF_Stream::GetRawSize() const {
189 if (IsFileBased()) {
190 return pdfium::base::checked_cast<size_t>(
191 absl::get<RetainPtr<IFX_SeekableReadStream>>(data_)->GetSize());
192 }
193 if (IsMemoryBased())
194 return absl::get<DataVector<uint8_t>>(data_).size();
195 DCHECK(IsUninitialized());
196 return 0;
197 }
198
GetInMemoryRawData() const199 pdfium::span<const uint8_t> CPDF_Stream::GetInMemoryRawData() const {
200 DCHECK(IsMemoryBased());
201 return absl::get<DataVector<uint8_t>>(data_);
202 }
203
SetLengthInDict(int length)204 void CPDF_Stream::SetLengthInDict(int length) {
205 if (!dict_)
206 dict_ = pdfium::MakeRetain<CPDF_Dictionary>();
207 dict_->SetNewFor<CPDF_Number>("Length", length);
208 }
209