1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
6
7 #include <stdint.h>
8
9 #include <map>
10 #include <numeric>
11 #include <sstream>
12 #include <utility>
13 #include <vector>
14
15 #include "core/fpdfapi/page/cpdf_pageobject.h"
16 #include "core/fpdfapi/page/cpdf_pageobjectholder.h"
17 #include "core/fpdfapi/parser/cpdf_array.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_document.h"
20 #include "core/fpdfapi/parser/cpdf_reference.h"
21 #include "core/fpdfapi/parser/cpdf_stream.h"
22 #include "core/fpdfapi/parser/object_tree_traversal_util.h"
23 #include "core/fxcrt/check.h"
24 #include "core/fxcrt/containers/adapters.h"
25 #include "core/fxcrt/containers/contains.h"
26 #include "core/fxcrt/numerics/safe_conversions.h"
27 #include "third_party/abseil-cpp/absl/types/variant.h"
28
CPDF_PageContentManager(CPDF_PageObjectHolder * page_obj_holder,CPDF_Document * document)29 CPDF_PageContentManager::CPDF_PageContentManager(
30 CPDF_PageObjectHolder* page_obj_holder,
31 CPDF_Document* document)
32 : page_obj_holder_(page_obj_holder),
33 document_(document),
34 objects_with_multi_refs_(GetObjectsWithMultipleReferences(document_)) {
35 RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
36 RetainPtr<CPDF_Object> contents_obj =
37 page_dict->GetMutableObjectFor("Contents");
38 RetainPtr<CPDF_Array> contents_array = ToArray(contents_obj);
39 if (contents_array) {
40 CHECK(contents_array->IsInline());
41 contents_ = std::move(contents_array);
42 return;
43 }
44
45 RetainPtr<CPDF_Reference> contents_reference = ToReference(contents_obj);
46 if (contents_reference) {
47 RetainPtr<CPDF_Object> indirect_obj =
48 contents_reference->GetMutableDirect();
49 if (!indirect_obj)
50 return;
51
52 contents_array.Reset(indirect_obj->AsMutableArray());
53 if (contents_array) {
54 if (pdfium::Contains(objects_with_multi_refs_,
55 contents_array->GetObjNum())) {
56 RetainPtr<CPDF_Array> cloned_contents_array =
57 pdfium::WrapRetain(contents_array->Clone()->AsMutableArray());
58 page_dict->SetFor("Contents", cloned_contents_array);
59 contents_ = std::move(cloned_contents_array);
60 } else {
61 contents_ = std::move(contents_array);
62 }
63 } else if (indirect_obj->IsStream()) {
64 contents_ = pdfium::WrapRetain(indirect_obj->AsMutableStream());
65 }
66 }
67 }
68
~CPDF_PageContentManager()69 CPDF_PageContentManager::~CPDF_PageContentManager() {
70 ExecuteScheduledRemovals();
71 }
72
HasStreamAtIndex(size_t stream_index)73 bool CPDF_PageContentManager::HasStreamAtIndex(size_t stream_index) {
74 return !!GetStreamByIndex(stream_index);
75 }
76
GetStreamByIndex(size_t stream_index)77 RetainPtr<CPDF_Stream> CPDF_PageContentManager::GetStreamByIndex(
78 size_t stream_index) {
79 RetainPtr<CPDF_Stream> contents_stream = GetContentsStream();
80 if (contents_stream) {
81 return stream_index == 0 ? contents_stream : nullptr;
82 }
83
84 RetainPtr<CPDF_Array> contents_array = GetContentsArray();
85 if (!contents_array) {
86 return nullptr;
87 }
88
89 RetainPtr<CPDF_Reference> stream_reference =
90 ToReference(contents_array->GetMutableObjectAt(stream_index));
91 if (!stream_reference)
92 return nullptr;
93
94 return ToStream(stream_reference->GetMutableDirect());
95 }
96
AddStream(fxcrt::ostringstream * buf)97 size_t CPDF_PageContentManager::AddStream(fxcrt::ostringstream* buf) {
98 auto new_stream = document_->NewIndirect<CPDF_Stream>(buf);
99
100 // If there is one Content stream (not in an array), now there will be two, so
101 // create an array with the old and the new one. The new one's index is 1.
102 RetainPtr<CPDF_Stream> contents_stream = GetContentsStream();
103 if (contents_stream) {
104 auto new_contents_array = document_->NewIndirect<CPDF_Array>();
105 new_contents_array->AppendNew<CPDF_Reference>(document_,
106 contents_stream->GetObjNum());
107 new_contents_array->AppendNew<CPDF_Reference>(document_,
108 new_stream->GetObjNum());
109
110 RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
111 page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
112 new_contents_array->GetObjNum());
113 contents_ = std::move(new_contents_array);
114 return 1;
115 }
116
117 // If there is an array, just add the new stream to it, at the last position.
118 RetainPtr<CPDF_Array> contents_array = GetContentsArray();
119 if (contents_array) {
120 contents_array->AppendNew<CPDF_Reference>(document_,
121 new_stream->GetObjNum());
122 return contents_array->size() - 1;
123 }
124
125 // There were no Contents, so add the new stream as the single Content stream.
126 // Its index is 0.
127 RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
128 page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
129 new_stream->GetObjNum());
130 contents_ = std::move(new_stream);
131 return 0;
132 }
133
UpdateStream(size_t stream_index,fxcrt::ostringstream * buf)134 void CPDF_PageContentManager::UpdateStream(size_t stream_index,
135 fxcrt::ostringstream* buf) {
136 // If `buf` is now empty, remove the stream instead of setting the data.
137 if (buf->tellp() <= 0) {
138 ScheduleRemoveStreamByIndex(stream_index);
139 return;
140 }
141
142 RetainPtr<CPDF_Stream> existing_stream = GetStreamByIndex(stream_index);
143 CHECK(existing_stream);
144 if (!pdfium::Contains(objects_with_multi_refs_,
145 existing_stream->GetObjNum())) {
146 existing_stream->SetDataFromStringstreamAndRemoveFilter(buf);
147 return;
148 }
149
150 if (GetContentsStream()) {
151 auto new_stream = document_->NewIndirect<CPDF_Stream>(buf);
152 RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
153 page_dict->SetNewFor<CPDF_Reference>("Contents", document_,
154 new_stream->GetObjNum());
155 }
156
157 RetainPtr<CPDF_Array> contents_array = GetContentsArray();
158 if (!contents_array) {
159 return;
160 }
161
162 RetainPtr<CPDF_Reference> stream_reference =
163 ToReference(contents_array->GetMutableObjectAt(stream_index));
164 if (!stream_reference) {
165 return;
166 }
167
168 auto new_stream = document_->NewIndirect<CPDF_Stream>(buf);
169 stream_reference->SetRef(document_, new_stream->GetObjNum());
170 }
171
ScheduleRemoveStreamByIndex(size_t stream_index)172 void CPDF_PageContentManager::ScheduleRemoveStreamByIndex(size_t stream_index) {
173 streams_to_remove_.insert(stream_index);
174 }
175
ExecuteScheduledRemovals()176 void CPDF_PageContentManager::ExecuteScheduledRemovals() {
177 // This method assumes there are no dirty streams in the
178 // CPDF_PageObjectHolder. If there were any, their indexes would need to be
179 // updated.
180 // Since CPDF_PageContentManager is only instantiated in
181 // CPDF_PageContentGenerator::GenerateContent(), which cleans up the dirty
182 // streams first, this should always be true.
183 // This method does not bother to inspect IsActive() for page objects; it will
184 // remove any object that has been scheduled for removal, regardless of active
185 // status.
186 DCHECK(!page_obj_holder_->HasDirtyStreams());
187
188 if (streams_to_remove_.empty()) {
189 return;
190 }
191
192 RetainPtr<CPDF_Stream> contents_stream = GetContentsStream();
193 if (contents_stream) {
194 // Only stream that can be removed is 0.
195 if (streams_to_remove_.find(0) != streams_to_remove_.end()) {
196 RetainPtr<CPDF_Dictionary> page_dict = page_obj_holder_->GetMutableDict();
197 page_dict->RemoveFor("Contents");
198 }
199 return;
200 }
201
202 RetainPtr<CPDF_Array> contents_array = GetContentsArray();
203 if (!contents_array) {
204 return;
205 }
206
207 // Initialize a vector with the old stream indexes. This will be used to build
208 // a map from the old to the new indexes.
209 std::vector<size_t> streams_left(contents_array->size());
210 std::iota(streams_left.begin(), streams_left.end(), 0);
211
212 // In reverse order so as to not change the indexes in the middle of the loop,
213 // remove the streams.
214 for (size_t stream_index : pdfium::Reversed(streams_to_remove_)) {
215 contents_array->RemoveAt(stream_index);
216 streams_left.erase(streams_left.begin() + stream_index);
217 }
218
219 // Create a mapping from the old to the new stream indexes, shifted due to the
220 // deletion of the |streams_to_remove_|.
221 std::map<size_t, size_t> stream_index_mapping;
222 for (size_t i = 0; i < streams_left.size(); ++i) {
223 stream_index_mapping[streams_left[i]] = i;
224 }
225
226 // Update the page objects' content stream indexes.
227 for (const auto& obj : *page_obj_holder_) {
228 int32_t old_stream_index = obj->GetContentStream();
229 int32_t new_stream_index =
230 pdfium::checked_cast<int32_t>(stream_index_mapping[old_stream_index]);
231 obj->SetContentStream(new_stream_index);
232 }
233
234 // Even if there is a single content stream now, keep the array with a single
235 // element. It's valid, a second stream might be added in the near future, and
236 // the complexity of removing it is not worth it.
237 }
238
GetContentsStream()239 RetainPtr<CPDF_Stream> CPDF_PageContentManager::GetContentsStream() {
240 if (absl::holds_alternative<RetainPtr<CPDF_Stream>>(contents_)) {
241 return absl::get<RetainPtr<CPDF_Stream>>(contents_);
242 }
243 return nullptr;
244 }
245
GetContentsArray()246 RetainPtr<CPDF_Array> CPDF_PageContentManager::GetContentsArray() {
247 if (absl::holds_alternative<RetainPtr<CPDF_Array>>(contents_)) {
248 return absl::get<RetainPtr<CPDF_Array>>(contents_);
249 }
250 return nullptr;
251 }
252