• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_stream.h"
8 
9 #include <stdint.h>
10 
11 #include <sstream>
12 #include <utility>
13 
14 #include "constants/stream_dict_common.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_encryptor.h"
17 #include "core/fpdfapi/parser/cpdf_flateencoder.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
20 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
21 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
22 #include "core/fxcrt/cfx_memorystream.h"
23 #include "core/fxcrt/check.h"
24 #include "core/fxcrt/containers/contains.h"
25 #include "core/fxcrt/data_vector.h"
26 #include "core/fxcrt/fx_stream.h"
27 #include "core/fxcrt/numerics/safe_conversions.h"
28 #include "core/fxcrt/span_util.h"
29 
30 namespace {
31 
IsMetaDataStreamDictionary(const CPDF_Dictionary * dict)32 bool IsMetaDataStreamDictionary(const CPDF_Dictionary* dict) {
33   // See ISO 32000-1:2008 spec, table 315.
34   return ValidateDictType(dict, "Metadata") &&
35          dict->GetNameFor("Subtype") == "XML";
36 }
37 
38 }  // namespace
39 
CPDF_Stream(RetainPtr<CPDF_Dictionary> dict)40 CPDF_Stream::CPDF_Stream(RetainPtr<CPDF_Dictionary> dict)
41     : CPDF_Stream(DataVector<uint8_t>(), std::move(dict)) {}
42 
CPDF_Stream(pdfium::span<const uint8_t> span)43 CPDF_Stream::CPDF_Stream(pdfium::span<const uint8_t> span)
44     : dict_(pdfium::MakeRetain<CPDF_Dictionary>()) {
45   SetData(span);
46 }
47 
CPDF_Stream(fxcrt::ostringstream * stream)48 CPDF_Stream::CPDF_Stream(fxcrt::ostringstream* stream)
49     : dict_(pdfium::MakeRetain<CPDF_Dictionary>()) {
50   SetDataFromStringstream(stream);
51 }
52 
CPDF_Stream(RetainPtr<IFX_SeekableReadStream> file,RetainPtr<CPDF_Dictionary> dict)53 CPDF_Stream::CPDF_Stream(RetainPtr<IFX_SeekableReadStream> file,
54                          RetainPtr<CPDF_Dictionary> dict)
55     : data_(std::move(file)), dict_(std::move(dict)) {
56   CHECK(dict_->IsInline());
57   SetLengthInDict(pdfium::checked_cast<int>(
58       absl::get<RetainPtr<IFX_SeekableReadStream>>(data_)->GetSize()));
59 }
60 
CPDF_Stream(DataVector<uint8_t> data,RetainPtr<CPDF_Dictionary> dict)61 CPDF_Stream::CPDF_Stream(DataVector<uint8_t> data,
62                          RetainPtr<CPDF_Dictionary> dict)
63     : data_(std::move(data)), dict_(std::move(dict)) {
64   CHECK(dict_->IsInline());
65   SetLengthInDict(
66       pdfium::checked_cast<int>(absl::get<DataVector<uint8_t>>(data_).size()));
67 }
68 
~CPDF_Stream()69 CPDF_Stream::~CPDF_Stream() {
70   m_ObjNum = kInvalidObjNum;
71   if (dict_->GetObjNum() == kInvalidObjNum) {
72     dict_.Leak();  // lowercase release, release ownership.
73   }
74 }
75 
GetType() const76 CPDF_Object::Type CPDF_Stream::GetType() const {
77   return kStream;
78 }
79 
GetDictInternal() const80 const CPDF_Dictionary* CPDF_Stream::GetDictInternal() const {
81   return dict_.Get();
82 }
83 
AsMutableStream()84 CPDF_Stream* CPDF_Stream::AsMutableStream() {
85   return this;
86 }
87 
InitStreamFromFile(RetainPtr<IFX_SeekableReadStream> file)88 void CPDF_Stream::InitStreamFromFile(RetainPtr<IFX_SeekableReadStream> file) {
89   const int size = pdfium::checked_cast<int>(file->GetSize());
90   data_ = std::move(file);
91   dict_ = pdfium::MakeRetain<CPDF_Dictionary>();
92   SetLengthInDict(size);
93 }
94 
Clone() const95 RetainPtr<CPDF_Object> CPDF_Stream::Clone() const {
96   return CloneObjectNonCyclic(false);
97 }
98 
CloneNonCyclic(bool bDirect,std::set<const CPDF_Object * > * pVisited) const99 RetainPtr<CPDF_Object> CPDF_Stream::CloneNonCyclic(
100     bool bDirect,
101     std::set<const CPDF_Object*>* pVisited) const {
102   pVisited->insert(this);
103   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(this));
104   pAcc->LoadAllDataRaw();
105 
106   RetainPtr<const CPDF_Dictionary> pDict = GetDict();
107   RetainPtr<CPDF_Dictionary> pNewDict;
108   if (!pdfium::Contains(*pVisited, pDict.Get())) {
109     pNewDict = ToDictionary(static_cast<const CPDF_Object*>(pDict.Get())
110                                 ->CloneNonCyclic(bDirect, pVisited));
111   }
112   return pdfium::MakeRetain<CPDF_Stream>(pAcc->DetachData(),
113                                          std::move(pNewDict));
114 }
115 
SetDataAndRemoveFilter(pdfium::span<const uint8_t> pData)116 void CPDF_Stream::SetDataAndRemoveFilter(pdfium::span<const uint8_t> pData) {
117   SetData(pData);
118   dict_->RemoveFor("Filter");
119   dict_->RemoveFor(pdfium::stream::kDecodeParms);
120 }
121 
SetDataFromStringstreamAndRemoveFilter(fxcrt::ostringstream * stream)122 void CPDF_Stream::SetDataFromStringstreamAndRemoveFilter(
123     fxcrt::ostringstream* stream) {
124   if (stream->tellp() <= 0) {
125     SetDataAndRemoveFilter({});
126     return;
127   }
128   SetDataAndRemoveFilter(pdfium::as_byte_span(stream->str())
129                              .first(static_cast<size_t>(stream->tellp())));
130 }
131 
SetData(pdfium::span<const uint8_t> pData)132 void CPDF_Stream::SetData(pdfium::span<const uint8_t> pData) {
133   DataVector<uint8_t> data_copy(pData.begin(), pData.end());
134   TakeData(std::move(data_copy));
135 }
136 
TakeData(DataVector<uint8_t> data)137 void CPDF_Stream::TakeData(DataVector<uint8_t> data) {
138   const int size = pdfium::checked_cast<int>(data.size());
139   data_ = std::move(data);
140   SetLengthInDict(size);
141 }
142 
SetDataFromStringstream(fxcrt::ostringstream * stream)143 void CPDF_Stream::SetDataFromStringstream(fxcrt::ostringstream* stream) {
144   if (stream->tellp() <= 0) {
145     SetData({});
146     return;
147   }
148   SetData(pdfium::as_byte_span(stream->str())
149               .first(static_cast<size_t>(stream->tellp())));
150 }
151 
ReadAllRawData() const152 DataVector<uint8_t> CPDF_Stream::ReadAllRawData() const {
153   CHECK(IsFileBased());
154 
155   DataVector<uint8_t> result(GetRawSize());
156   DCHECK(!result.empty());
157 
158   auto underlying_stream = absl::get<RetainPtr<IFX_SeekableReadStream>>(data_);
159   if (!underlying_stream->ReadBlockAtOffset(result, 0))
160     return DataVector<uint8_t>();
161 
162   return result;
163 }
164 
HasFilter() const165 bool CPDF_Stream::HasFilter() const {
166   return dict_->KeyExist("Filter");
167 }
168 
GetUnicodeText() const169 WideString CPDF_Stream::GetUnicodeText() const {
170   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(this));
171   pAcc->LoadAllDataFiltered();
172   return PDF_DecodeText(pAcc->GetSpan());
173 }
174 
WriteTo(IFX_ArchiveStream * archive,const CPDF_Encryptor * encryptor) const175 bool CPDF_Stream::WriteTo(IFX_ArchiveStream* archive,
176                           const CPDF_Encryptor* encryptor) const {
177   const bool is_metadata = IsMetaDataStreamDictionary(GetDict().Get());
178   CPDF_FlateEncoder encoder(pdfium::WrapRetain(this), !is_metadata);
179 
180   DataVector<uint8_t> encrypted_data;
181   pdfium::span<const uint8_t> data = encoder.GetSpan();
182   if (encryptor && !is_metadata) {
183     encrypted_data = encryptor->Encrypt(data);
184     data = encrypted_data;
185   }
186 
187   encoder.UpdateLength(data.size());
188   if (!encoder.WriteDictTo(archive, encryptor))
189     return false;
190 
191   if (!archive->WriteString("stream\r\n"))
192     return false;
193 
194   if (!archive->WriteBlock(data))
195     return false;
196 
197   return archive->WriteString("\r\nendstream");
198 }
199 
GetRawSize() const200 size_t CPDF_Stream::GetRawSize() const {
201   if (IsFileBased()) {
202     return pdfium::checked_cast<size_t>(
203         absl::get<RetainPtr<IFX_SeekableReadStream>>(data_)->GetSize());
204   }
205   return absl::get<DataVector<uint8_t>>(data_).size();
206 }
207 
GetInMemoryRawData() const208 pdfium::span<const uint8_t> CPDF_Stream::GetInMemoryRawData() const {
209   DCHECK(IsMemoryBased());
210   return absl::get<DataVector<uint8_t>>(data_);
211 }
212 
SetLengthInDict(int length)213 void CPDF_Stream::SetLengthInDict(int length) {
214   dict_->SetNewFor<CPDF_Number>("Length", length);
215 }
216