1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_stream.h"
8
9 #include <stdint.h>
10
11 #include <sstream>
12 #include <utility>
13
14 #include "constants/stream_dict_common.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_encryptor.h"
17 #include "core/fpdfapi/parser/cpdf_flateencoder.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
20 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
21 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
22 #include "core/fxcrt/cfx_memorystream.h"
23 #include "core/fxcrt/check.h"
24 #include "core/fxcrt/containers/contains.h"
25 #include "core/fxcrt/data_vector.h"
26 #include "core/fxcrt/fx_stream.h"
27 #include "core/fxcrt/numerics/safe_conversions.h"
28 #include "core/fxcrt/span_util.h"
29
30 namespace {
31
IsMetaDataStreamDictionary(const CPDF_Dictionary * dict)32 bool IsMetaDataStreamDictionary(const CPDF_Dictionary* dict) {
33 // See ISO 32000-1:2008 spec, table 315.
34 return ValidateDictType(dict, "Metadata") &&
35 dict->GetNameFor("Subtype") == "XML";
36 }
37
38 } // namespace
39
CPDF_Stream(RetainPtr<CPDF_Dictionary> dict)40 CPDF_Stream::CPDF_Stream(RetainPtr<CPDF_Dictionary> dict)
41 : CPDF_Stream(DataVector<uint8_t>(), std::move(dict)) {}
42
CPDF_Stream(pdfium::span<const uint8_t> span)43 CPDF_Stream::CPDF_Stream(pdfium::span<const uint8_t> span)
44 : dict_(pdfium::MakeRetain<CPDF_Dictionary>()) {
45 SetData(span);
46 }
47
CPDF_Stream(fxcrt::ostringstream * stream)48 CPDF_Stream::CPDF_Stream(fxcrt::ostringstream* stream)
49 : dict_(pdfium::MakeRetain<CPDF_Dictionary>()) {
50 SetDataFromStringstream(stream);
51 }
52
CPDF_Stream(RetainPtr<IFX_SeekableReadStream> file,RetainPtr<CPDF_Dictionary> dict)53 CPDF_Stream::CPDF_Stream(RetainPtr<IFX_SeekableReadStream> file,
54 RetainPtr<CPDF_Dictionary> dict)
55 : data_(std::move(file)), dict_(std::move(dict)) {
56 CHECK(dict_->IsInline());
57 SetLengthInDict(pdfium::checked_cast<int>(
58 absl::get<RetainPtr<IFX_SeekableReadStream>>(data_)->GetSize()));
59 }
60
CPDF_Stream(DataVector<uint8_t> data,RetainPtr<CPDF_Dictionary> dict)61 CPDF_Stream::CPDF_Stream(DataVector<uint8_t> data,
62 RetainPtr<CPDF_Dictionary> dict)
63 : data_(std::move(data)), dict_(std::move(dict)) {
64 CHECK(dict_->IsInline());
65 SetLengthInDict(
66 pdfium::checked_cast<int>(absl::get<DataVector<uint8_t>>(data_).size()));
67 }
68
~CPDF_Stream()69 CPDF_Stream::~CPDF_Stream() {
70 m_ObjNum = kInvalidObjNum;
71 if (dict_->GetObjNum() == kInvalidObjNum) {
72 dict_.Leak(); // lowercase release, release ownership.
73 }
74 }
75
GetType() const76 CPDF_Object::Type CPDF_Stream::GetType() const {
77 return kStream;
78 }
79
GetDictInternal() const80 const CPDF_Dictionary* CPDF_Stream::GetDictInternal() const {
81 return dict_.Get();
82 }
83
AsMutableStream()84 CPDF_Stream* CPDF_Stream::AsMutableStream() {
85 return this;
86 }
87
InitStreamFromFile(RetainPtr<IFX_SeekableReadStream> file)88 void CPDF_Stream::InitStreamFromFile(RetainPtr<IFX_SeekableReadStream> file) {
89 const int size = pdfium::checked_cast<int>(file->GetSize());
90 data_ = std::move(file);
91 dict_ = pdfium::MakeRetain<CPDF_Dictionary>();
92 SetLengthInDict(size);
93 }
94
Clone() const95 RetainPtr<CPDF_Object> CPDF_Stream::Clone() const {
96 return CloneObjectNonCyclic(false);
97 }
98
CloneNonCyclic(bool bDirect,std::set<const CPDF_Object * > * pVisited) const99 RetainPtr<CPDF_Object> CPDF_Stream::CloneNonCyclic(
100 bool bDirect,
101 std::set<const CPDF_Object*>* pVisited) const {
102 pVisited->insert(this);
103 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(this));
104 pAcc->LoadAllDataRaw();
105
106 RetainPtr<const CPDF_Dictionary> pDict = GetDict();
107 RetainPtr<CPDF_Dictionary> pNewDict;
108 if (!pdfium::Contains(*pVisited, pDict.Get())) {
109 pNewDict = ToDictionary(static_cast<const CPDF_Object*>(pDict.Get())
110 ->CloneNonCyclic(bDirect, pVisited));
111 }
112 return pdfium::MakeRetain<CPDF_Stream>(pAcc->DetachData(),
113 std::move(pNewDict));
114 }
115
SetDataAndRemoveFilter(pdfium::span<const uint8_t> pData)116 void CPDF_Stream::SetDataAndRemoveFilter(pdfium::span<const uint8_t> pData) {
117 SetData(pData);
118 dict_->RemoveFor("Filter");
119 dict_->RemoveFor(pdfium::stream::kDecodeParms);
120 }
121
SetDataFromStringstreamAndRemoveFilter(fxcrt::ostringstream * stream)122 void CPDF_Stream::SetDataFromStringstreamAndRemoveFilter(
123 fxcrt::ostringstream* stream) {
124 if (stream->tellp() <= 0) {
125 SetDataAndRemoveFilter({});
126 return;
127 }
128 SetDataAndRemoveFilter(pdfium::as_byte_span(stream->str())
129 .first(static_cast<size_t>(stream->tellp())));
130 }
131
SetData(pdfium::span<const uint8_t> pData)132 void CPDF_Stream::SetData(pdfium::span<const uint8_t> pData) {
133 DataVector<uint8_t> data_copy(pData.begin(), pData.end());
134 TakeData(std::move(data_copy));
135 }
136
TakeData(DataVector<uint8_t> data)137 void CPDF_Stream::TakeData(DataVector<uint8_t> data) {
138 const int size = pdfium::checked_cast<int>(data.size());
139 data_ = std::move(data);
140 SetLengthInDict(size);
141 }
142
SetDataFromStringstream(fxcrt::ostringstream * stream)143 void CPDF_Stream::SetDataFromStringstream(fxcrt::ostringstream* stream) {
144 if (stream->tellp() <= 0) {
145 SetData({});
146 return;
147 }
148 SetData(pdfium::as_byte_span(stream->str())
149 .first(static_cast<size_t>(stream->tellp())));
150 }
151
ReadAllRawData() const152 DataVector<uint8_t> CPDF_Stream::ReadAllRawData() const {
153 CHECK(IsFileBased());
154
155 DataVector<uint8_t> result(GetRawSize());
156 DCHECK(!result.empty());
157
158 auto underlying_stream = absl::get<RetainPtr<IFX_SeekableReadStream>>(data_);
159 if (!underlying_stream->ReadBlockAtOffset(result, 0))
160 return DataVector<uint8_t>();
161
162 return result;
163 }
164
HasFilter() const165 bool CPDF_Stream::HasFilter() const {
166 return dict_->KeyExist("Filter");
167 }
168
GetUnicodeText() const169 WideString CPDF_Stream::GetUnicodeText() const {
170 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(this));
171 pAcc->LoadAllDataFiltered();
172 return PDF_DecodeText(pAcc->GetSpan());
173 }
174
WriteTo(IFX_ArchiveStream * archive,const CPDF_Encryptor * encryptor) const175 bool CPDF_Stream::WriteTo(IFX_ArchiveStream* archive,
176 const CPDF_Encryptor* encryptor) const {
177 const bool is_metadata = IsMetaDataStreamDictionary(GetDict().Get());
178 CPDF_FlateEncoder encoder(pdfium::WrapRetain(this), !is_metadata);
179
180 DataVector<uint8_t> encrypted_data;
181 pdfium::span<const uint8_t> data = encoder.GetSpan();
182 if (encryptor && !is_metadata) {
183 encrypted_data = encryptor->Encrypt(data);
184 data = encrypted_data;
185 }
186
187 encoder.UpdateLength(data.size());
188 if (!encoder.WriteDictTo(archive, encryptor))
189 return false;
190
191 if (!archive->WriteString("stream\r\n"))
192 return false;
193
194 if (!archive->WriteBlock(data))
195 return false;
196
197 return archive->WriteString("\r\nendstream");
198 }
199
GetRawSize() const200 size_t CPDF_Stream::GetRawSize() const {
201 if (IsFileBased()) {
202 return pdfium::checked_cast<size_t>(
203 absl::get<RetainPtr<IFX_SeekableReadStream>>(data_)->GetSize());
204 }
205 return absl::get<DataVector<uint8_t>>(data_).size();
206 }
207
GetInMemoryRawData() const208 pdfium::span<const uint8_t> CPDF_Stream::GetInMemoryRawData() const {
209 DCHECK(IsMemoryBased());
210 return absl::get<DataVector<uint8_t>>(data_);
211 }
212
SetLengthInDict(int length)213 void CPDF_Stream::SetLengthInDict(int length) {
214 dict_->SetNewFor<CPDF_Number>("Length", length);
215 }
216