• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_stream.h"
8 
9 #include <stdint.h>
10 
11 #include <sstream>
12 #include <utility>
13 
14 #include "constants/stream_dict_common.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_encryptor.h"
17 #include "core/fpdfapi/parser/cpdf_flateencoder.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
20 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
21 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
22 #include "core/fxcrt/cfx_memorystream.h"
23 #include "core/fxcrt/data_vector.h"
24 #include "core/fxcrt/fx_stream.h"
25 #include "core/fxcrt/span_util.h"
26 #include "third_party/base/containers/contains.h"
27 #include "third_party/base/numerics/safe_conversions.h"
28 
29 namespace {
30 
IsMetaDataStreamDictionary(const CPDF_Dictionary * dict)31 bool IsMetaDataStreamDictionary(const CPDF_Dictionary* dict) {
32   // See ISO 32000-1:2008 spec, table 315.
33   return ValidateDictType(dict, "Metadata") &&
34          dict->GetNameFor("Subtype") == "XML";
35 }
36 
37 }  // namespace
38 
39 CPDF_Stream::CPDF_Stream() = default;
40 
CPDF_Stream(RetainPtr<CPDF_Dictionary> pDict)41 CPDF_Stream::CPDF_Stream(RetainPtr<CPDF_Dictionary> pDict)
42     : CPDF_Stream(DataVector<uint8_t>(), std::move(pDict)) {}
43 
CPDF_Stream(DataVector<uint8_t> pData,RetainPtr<CPDF_Dictionary> pDict)44 CPDF_Stream::CPDF_Stream(DataVector<uint8_t> pData,
45                          RetainPtr<CPDF_Dictionary> pDict)
46     : data_(std::move(pData)), dict_(std::move(pDict)) {
47   SetLengthInDict(pdfium::base::checked_cast<int>(
48       absl::get<DataVector<uint8_t>>(data_).size()));
49 }
50 
~CPDF_Stream()51 CPDF_Stream::~CPDF_Stream() {
52   m_ObjNum = kInvalidObjNum;
53   if (dict_ && dict_->GetObjNum() == kInvalidObjNum)
54     dict_.Leak();  // lowercase release, release ownership.
55 }
56 
GetType() const57 CPDF_Object::Type CPDF_Stream::GetType() const {
58   return kStream;
59 }
60 
GetDictInternal() const61 const CPDF_Dictionary* CPDF_Stream::GetDictInternal() const {
62   return dict_.Get();
63 }
64 
AsMutableStream()65 CPDF_Stream* CPDF_Stream::AsMutableStream() {
66   return this;
67 }
68 
InitStreamWithEmptyData(RetainPtr<CPDF_Dictionary> pDict)69 void CPDF_Stream::InitStreamWithEmptyData(RetainPtr<CPDF_Dictionary> pDict) {
70   dict_ = std::move(pDict);
71   TakeData({});
72 }
73 
InitStreamFromFile(RetainPtr<IFX_SeekableReadStream> pFile,RetainPtr<CPDF_Dictionary> pDict)74 void CPDF_Stream::InitStreamFromFile(RetainPtr<IFX_SeekableReadStream> pFile,
75                                      RetainPtr<CPDF_Dictionary> pDict) {
76   data_ = pFile;
77   dict_ = std::move(pDict);
78   SetLengthInDict(pdfium::base::checked_cast<int>(pFile->GetSize()));
79 }
80 
Clone() const81 RetainPtr<CPDF_Object> CPDF_Stream::Clone() const {
82   return CloneObjectNonCyclic(false);
83 }
84 
CloneNonCyclic(bool bDirect,std::set<const CPDF_Object * > * pVisited) const85 RetainPtr<CPDF_Object> CPDF_Stream::CloneNonCyclic(
86     bool bDirect,
87     std::set<const CPDF_Object*>* pVisited) const {
88   pVisited->insert(this);
89   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(this));
90   pAcc->LoadAllDataRaw();
91 
92   RetainPtr<const CPDF_Dictionary> pDict = GetDict();
93   RetainPtr<CPDF_Dictionary> pNewDict;
94   if (pDict && !pdfium::Contains(*pVisited, pDict.Get())) {
95     pNewDict = ToDictionary(static_cast<const CPDF_Object*>(pDict.Get())
96                                 ->CloneNonCyclic(bDirect, pVisited));
97   }
98   return pdfium::MakeRetain<CPDF_Stream>(pAcc->DetachData(),
99                                          std::move(pNewDict));
100 }
101 
SetDataAndRemoveFilter(pdfium::span<const uint8_t> pData)102 void CPDF_Stream::SetDataAndRemoveFilter(pdfium::span<const uint8_t> pData) {
103   SetData(pData);
104   dict_->RemoveFor("Filter");
105   dict_->RemoveFor(pdfium::stream::kDecodeParms);
106 }
107 
SetDataFromStringstreamAndRemoveFilter(fxcrt::ostringstream * stream)108 void CPDF_Stream::SetDataFromStringstreamAndRemoveFilter(
109     fxcrt::ostringstream* stream) {
110   if (stream->tellp() <= 0) {
111     SetDataAndRemoveFilter({});
112     return;
113   }
114 
115   SetDataAndRemoveFilter(
116       {reinterpret_cast<const uint8_t*>(stream->str().c_str()),
117        static_cast<size_t>(stream->tellp())});
118 }
119 
SetData(pdfium::span<const uint8_t> pData)120 void CPDF_Stream::SetData(pdfium::span<const uint8_t> pData) {
121   DataVector<uint8_t> data_copy(pData.begin(), pData.end());
122   TakeData(std::move(data_copy));
123 }
124 
TakeData(DataVector<uint8_t> data)125 void CPDF_Stream::TakeData(DataVector<uint8_t> data) {
126   const size_t size = data.size();
127   data_ = std::move(data);
128   SetLengthInDict(pdfium::base::checked_cast<int>(size));
129 }
130 
SetDataFromStringstream(fxcrt::ostringstream * stream)131 void CPDF_Stream::SetDataFromStringstream(fxcrt::ostringstream* stream) {
132   if (stream->tellp() <= 0) {
133     SetData({});
134     return;
135   }
136   SetData({reinterpret_cast<const uint8_t*>(stream->str().c_str()),
137            static_cast<size_t>(stream->tellp())});
138 }
139 
ReadAllRawData() const140 DataVector<uint8_t> CPDF_Stream::ReadAllRawData() const {
141   CHECK(IsFileBased());
142 
143   DataVector<uint8_t> result(GetRawSize());
144   DCHECK(!result.empty());
145 
146   auto underlying_stream = absl::get<RetainPtr<IFX_SeekableReadStream>>(data_);
147   if (!underlying_stream->ReadBlockAtOffset(result, 0))
148     return DataVector<uint8_t>();
149 
150   return result;
151 }
152 
HasFilter() const153 bool CPDF_Stream::HasFilter() const {
154   return dict_ && dict_->KeyExist("Filter");
155 }
156 
GetUnicodeText() const157 WideString CPDF_Stream::GetUnicodeText() const {
158   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(this));
159   pAcc->LoadAllDataFiltered();
160   return PDF_DecodeText(pAcc->GetSpan());
161 }
162 
WriteTo(IFX_ArchiveStream * archive,const CPDF_Encryptor * encryptor) const163 bool CPDF_Stream::WriteTo(IFX_ArchiveStream* archive,
164                           const CPDF_Encryptor* encryptor) const {
165   const bool is_metadata = IsMetaDataStreamDictionary(GetDict().Get());
166   CPDF_FlateEncoder encoder(pdfium::WrapRetain(this), !is_metadata);
167 
168   DataVector<uint8_t> encrypted_data;
169   pdfium::span<const uint8_t> data = encoder.GetSpan();
170   if (encryptor && !is_metadata) {
171     encrypted_data = encryptor->Encrypt(data);
172     data = encrypted_data;
173   }
174 
175   encoder.UpdateLength(data.size());
176   if (!encoder.WriteDictTo(archive, encryptor))
177     return false;
178 
179   if (!archive->WriteString("stream\r\n"))
180     return false;
181 
182   if (!archive->WriteBlock(data))
183     return false;
184 
185   return archive->WriteString("\r\nendstream");
186 }
187 
GetRawSize() const188 size_t CPDF_Stream::GetRawSize() const {
189   if (IsFileBased()) {
190     return pdfium::base::checked_cast<size_t>(
191         absl::get<RetainPtr<IFX_SeekableReadStream>>(data_)->GetSize());
192   }
193   if (IsMemoryBased())
194     return absl::get<DataVector<uint8_t>>(data_).size();
195   DCHECK(IsUninitialized());
196   return 0;
197 }
198 
GetInMemoryRawData() const199 pdfium::span<const uint8_t> CPDF_Stream::GetInMemoryRawData() const {
200   DCHECK(IsMemoryBased());
201   return absl::get<DataVector<uint8_t>>(data_);
202 }
203 
SetLengthInDict(int length)204 void CPDF_Stream::SetLengthInDict(int length) {
205   if (!dict_)
206     dict_ = pdfium::MakeRetain<CPDF_Dictionary>();
207   dict_->SetNewFor<CPDF_Number>("Length", length);
208 }
209