• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
6 
7 #include <stdint.h>
8 
9 #include <map>
10 #include <numeric>
11 #include <sstream>
12 #include <utility>
13 #include <vector>
14 
15 #include "core/fpdfapi/page/cpdf_pageobject.h"
16 #include "core/fpdfapi/page/cpdf_pageobjectholder.h"
17 #include "core/fpdfapi/parser/cpdf_array.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_document.h"
20 #include "core/fpdfapi/parser/cpdf_reference.h"
21 #include "core/fpdfapi/parser/cpdf_stream.h"
22 #include "core/fpdfapi/parser/object_tree_traversal_util.h"
23 #include "core/fxcrt/check.h"
24 #include "core/fxcrt/containers/adapters.h"
25 #include "core/fxcrt/containers/contains.h"
26 #include "core/fxcrt/numerics/safe_conversions.h"
27 #include "third_party/abseil-cpp/absl/types/variant.h"
28 
CPDF_PageContentManager(CPDF_PageObjectHolder * page_obj_holder,CPDF_Document * document)29 CPDF_PageContentManager::CPDF_PageContentManager(
30     CPDF_PageObjectHolder* page_obj_holder,
31     CPDF_Document* document)
32     : page_obj_holder_(page_obj_holder),
33       document_(document),
34       objects_with_multi_refs_(GetObjectsWithMultipleReferences(document_)) {
35   RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
36   RetainPtr<CPDF_Object> contents_obj =
37       page_dict->GetMutableObjectFor("Contents");
38   RetainPtr<CPDF_Array> contents_array = ToArray(contents_obj);
39   if (contents_array) {
40     CHECK(contents_array->IsInline());
41     contents_ = std::move(contents_array);
42     return;
43   }
44 
45   RetainPtr<CPDF_Reference> contents_reference = ToReference(contents_obj);
46   if (contents_reference) {
47     RetainPtr<CPDF_Object> indirect_obj =
48         contents_reference->GetMutableDirect();
49     if (!indirect_obj)
50       return;
51 
52     contents_array.Reset(indirect_obj->AsMutableArray());
53     if (contents_array) {
54       if (pdfium::Contains(objects_with_multi_refs_,
55                            contents_array->GetObjNum())) {
56         RetainPtr<CPDF_Array> cloned_contents_array =
57             pdfium::WrapRetain(contents_array->Clone()->AsMutableArray());
58         page_dict->SetFor("Contents", cloned_contents_array);
59         contents_ = std::move(cloned_contents_array);
60       } else {
61         contents_ = std::move(contents_array);
62       }
63     } else if (indirect_obj->IsStream()) {
64       contents_ = pdfium::WrapRetain(indirect_obj->AsMutableStream());
65     }
66   }
67 }
68 
~CPDF_PageContentManager()69 CPDF_PageContentManager::~CPDF_PageContentManager() {
70   ExecuteScheduledRemovals();
71 }
72 
HasStreamAtIndex(size_t stream_index)73 bool CPDF_PageContentManager::HasStreamAtIndex(size_t stream_index) {
74   return !!GetStreamByIndex(stream_index);
75 }
76 
GetStreamByIndex(size_t stream_index)77 RetainPtr<CPDF_Stream> CPDF_PageContentManager::GetStreamByIndex(
78     size_t stream_index) {
79   RetainPtr<CPDF_Stream> contents_stream = GetContentsStream();
80   if (contents_stream) {
81     return stream_index == 0 ? contents_stream : nullptr;
82   }
83 
84   RetainPtr<CPDF_Array> contents_array = GetContentsArray();
85   if (!contents_array) {
86     return nullptr;
87   }
88 
89   RetainPtr<CPDF_Reference> stream_reference =
90       ToReference(contents_array->GetMutableObjectAt(stream_index));
91   if (!stream_reference)
92     return nullptr;
93 
94   return ToStream(stream_reference->GetMutableDirect());
95 }
96 
AddStream(fxcrt::ostringstream * buf)97 size_t CPDF_PageContentManager::AddStream(fxcrt::ostringstream* buf) {
98   auto new_stream = document_->NewIndirect<CPDF_Stream>(buf);
99 
100   // If there is one Content stream (not in an array), now there will be two, so
101   // create an array with the old and the new one. The new one's index is 1.
102   RetainPtr<CPDF_Stream> contents_stream = GetContentsStream();
103   if (contents_stream) {
104     auto new_contents_array = document_->NewIndirect<CPDF_Array>();
105     new_contents_array->AppendNew<CPDF_Reference>(document_,
106                                                   contents_stream->GetObjNum());
107     new_contents_array->AppendNew<CPDF_Reference>(document_,
108                                                   new_stream->GetObjNum());
109 
110     RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
111     page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
112                                          new_contents_array->GetObjNum());
113     contents_ = std::move(new_contents_array);
114     return 1;
115   }
116 
117   // If there is an array, just add the new stream to it, at the last position.
118   RetainPtr<CPDF_Array> contents_array = GetContentsArray();
119   if (contents_array) {
120     contents_array->AppendNew<CPDF_Reference>(document_,
121                                               new_stream->GetObjNum());
122     return contents_array->size() - 1;
123   }
124 
125   // There were no Contents, so add the new stream as the single Content stream.
126   // Its index is 0.
127   RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
128   page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
129                                        new_stream->GetObjNum());
130   contents_ = std::move(new_stream);
131   return 0;
132 }
133 
UpdateStream(size_t stream_index,fxcrt::ostringstream * buf)134 void CPDF_PageContentManager::UpdateStream(size_t stream_index,
135                                            fxcrt::ostringstream* buf) {
136   // If `buf` is now empty, remove the stream instead of setting the data.
137   if (buf->tellp() <= 0) {
138     ScheduleRemoveStreamByIndex(stream_index);
139     return;
140   }
141 
142   RetainPtr<CPDF_Stream> existing_stream = GetStreamByIndex(stream_index);
143   CHECK(existing_stream);
144   if (!pdfium::Contains(objects_with_multi_refs_,
145                         existing_stream->GetObjNum())) {
146     existing_stream->SetDataFromStringstreamAndRemoveFilter(buf);
147     return;
148   }
149 
150   if (GetContentsStream()) {
151     auto new_stream = document_->NewIndirect<CPDF_Stream>(buf);
152     RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
153     page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
154                                          new_stream->GetObjNum());
155   }
156 
157   RetainPtr<CPDF_Array> contents_array = GetContentsArray();
158   if (!contents_array) {
159     return;
160   }
161 
162   RetainPtr<CPDF_Reference> stream_reference =
163       ToReference(contents_array->GetMutableObjectAt(stream_index));
164   if (!stream_reference) {
165     return;
166   }
167 
168   auto new_stream = document_->NewIndirect<CPDF_Stream>(buf);
169   stream_reference->SetRef(document_, new_stream->GetObjNum());
170 }
171 
ScheduleRemoveStreamByIndex(size_t stream_index)172 void CPDF_PageContentManager::ScheduleRemoveStreamByIndex(size_t stream_index) {
173   streams_to_remove_.insert(stream_index);
174 }
175 
ExecuteScheduledRemovals()176 void CPDF_PageContentManager::ExecuteScheduledRemovals() {
177   // This method assumes there are no dirty streams in the
178   // CPDF_PageObjectHolder. If there were any, their indexes would need to be
179   // updated.
180   // Since CPDF_PageContentManager is only instantiated in
181   // CPDF_PageContentGenerator::GenerateContent(), which cleans up the dirty
182   // streams first, this should always be true.
183   // This method does not bother to inspect IsActive() for page objects; it will
184   // remove any object that has been scheduled for removal, regardless of active
185   // status.
186   DCHECK(!page_obj_holder_->HasDirtyStreams());
187 
188   if (streams_to_remove_.empty()) {
189     return;
190   }
191 
192   RetainPtr<CPDF_Stream> contents_stream = GetContentsStream();
193   if (contents_stream) {
194     // Only stream that can be removed is 0.
195     if (streams_to_remove_.find(0) != streams_to_remove_.end()) {
196       RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
197       page_dict->RemoveFor("Contents");
198     }
199     return;
200   }
201 
202   RetainPtr<CPDF_Array> contents_array = GetContentsArray();
203   if (!contents_array) {
204     return;
205   }
206 
207   // Initialize a vector with the old stream indexes. This will be used to build
208   // a map from the old to the new indexes.
209   std::vector<size_t> streams_left(contents_array->size());
210   std::iota(streams_left.begin(), streams_left.end(), 0);
211 
212   // In reverse order so as to not change the indexes in the middle of the loop,
213   // remove the streams.
214   for (size_t stream_index : pdfium::Reversed(streams_to_remove_)) {
215     contents_array->RemoveAt(stream_index);
216     streams_left.erase(streams_left.begin() + stream_index);
217   }
218 
219   // Create a mapping from the old to the new stream indexes, shifted due to the
220   // deletion of the |streams_to_remove_|.
221   std::map<size_t, size_t> stream_index_mapping;
222   for (size_t i = 0; i < streams_left.size(); ++i) {
223     stream_index_mapping[streams_left[i]] = i;
224   }
225 
226   // Update the page objects' content stream indexes.
227   for (const auto& obj : *page_obj_holder_) {
228     int32_t old_stream_index = obj->GetContentStream();
229     int32_t new_stream_index =
230         pdfium::checked_cast<int32_t>(stream_index_mapping[old_stream_index]);
231     obj->SetContentStream(new_stream_index);
232   }
233 
234   // Even if there is a single content stream now, keep the array with a single
235   // element. It's valid, a second stream might be added in the near future, and
236   // the complexity of removing it is not worth it.
237 }
238 
GetContentsStream()239 RetainPtr<CPDF_Stream> CPDF_PageContentManager::GetContentsStream() {
240   if (absl::holds_alternative<RetainPtr<CPDF_Stream>>(contents_)) {
241     return absl::get<RetainPtr<CPDF_Stream>>(contents_);
242   }
243   return nullptr;
244 }
245 
GetContentsArray()246 RetainPtr<CPDF_Array> CPDF_PageContentManager::GetContentsArray() {
247   if (absl::holds_alternative<RetainPtr<CPDF_Array>>(contents_)) {
248     return absl::get<RetainPtr<CPDF_Array>>(contents_);
249   }
250   return nullptr;
251 }
252