1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_document.h"
8
9 #include <algorithm>
10 #include <functional>
11 #include <optional>
12 #include <utility>
13
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
17 #include "core/fpdfapi/parser/cpdf_name.h"
18 #include "core/fpdfapi/parser/cpdf_null.h"
19 #include "core/fpdfapi/parser/cpdf_number.h"
20 #include "core/fpdfapi/parser/cpdf_parser.h"
21 #include "core/fpdfapi/parser/cpdf_read_validator.h"
22 #include "core/fpdfapi/parser/cpdf_reference.h"
23 #include "core/fpdfapi/parser/cpdf_stream.h"
24 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
25 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
26 #include "core/fxcodec/jbig2/JBig2_DocumentContext.h"
27 #include "core/fxcrt/check.h"
28 #include "core/fxcrt/check_op.h"
29 #include "core/fxcrt/containers/contains.h"
30 #include "core/fxcrt/fx_codepage.h"
31 #include "core/fxcrt/scoped_set_insertion.h"
32 #include "core/fxcrt/span.h"
33 #include "core/fxcrt/stl_util.h"
34
35 namespace {
36
37 const int kMaxPageLevel = 1024;
38
39 enum class NodeType : bool {
40 kBranch, // /Type /Pages, AKA page tree node.
41 kLeaf, // /Type /Page, AKA page object.
42 };
43
44 // Note that this function may modify `kid_dict` to correct PDF spec violations.
45 // Same reasoning as CountPages() below.
GetNodeType(RetainPtr<CPDF_Dictionary> kid_dict)46 NodeType GetNodeType(RetainPtr<CPDF_Dictionary> kid_dict) {
47 const ByteString kid_type_value = kid_dict->GetNameFor("Type");
48 if (kid_type_value == "Pages") {
49 return NodeType::kBranch;
50 }
51 if (kid_type_value == "Page") {
52 return NodeType::kLeaf;
53 }
54
55 // Even though /Type is required for page tree nodes and page objects, PDFs
56 // may not have them or have the wrong type. Tolerate these errors and guess
57 // the type. Then fix the in-memory representation.
58 const bool has_kids = kid_dict->KeyExist("Kids");
59 kid_dict->SetNewFor<CPDF_Name>("Type", has_kids ? "Pages" : "Page");
60 return has_kids ? NodeType::kBranch : NodeType::kLeaf;
61 }
62
63 // Returns a value in the range [0, `CPDF_Document::kPageMaxNum`), or nullopt on
64 // error. Note that this function may modify `pages_dict` to correct PDF spec
65 // violations. By normalizing the in-memory representation, other code that
66 // reads the object do not have to deal with the same spec violations again.
67 // If the PDF gets saved, the saved copy will also be more spec-compliant.
CountPages(RetainPtr<CPDF_Dictionary> pages_dict,std::set<RetainPtr<CPDF_Dictionary>> * visited_pages)68 std::optional<int> CountPages(
69 RetainPtr<CPDF_Dictionary> pages_dict,
70 std::set<RetainPtr<CPDF_Dictionary>>* visited_pages) {
71 // Required. See ISO 32000-1:2008 spec, table 29, but tolerate page tree nodes
72 // that violate the spec.
73 int count_from_dict = pages_dict->GetIntegerFor("Count");
74 if (count_from_dict > 0 && count_from_dict < CPDF_Document::kPageMaxNum) {
75 return count_from_dict;
76 }
77
78 RetainPtr<CPDF_Array> kids_array = pages_dict->GetMutableArrayFor("Kids");
79 if (!kids_array) {
80 return 0;
81 }
82
83 int count = 0;
84 for (size_t i = 0; i < kids_array->size(); i++) {
85 RetainPtr<CPDF_Dictionary> kid_dict = kids_array->GetMutableDictAt(i);
86 if (!kid_dict || pdfium::Contains(*visited_pages, kid_dict)) {
87 continue;
88 }
89
90 NodeType kid_type = GetNodeType(kid_dict);
91 if (kid_type == NodeType::kBranch) {
92 // Use |visited_pages| to help detect circular references of pages.
93 ScopedSetInsertion<RetainPtr<CPDF_Dictionary>> local_add(visited_pages,
94 kid_dict);
95 std::optional<int> local_count =
96 CountPages(std::move(kid_dict), visited_pages);
97 if (!local_count.has_value()) {
98 return std::nullopt; // Propagate error.
99 }
100 count += local_count.value();
101 } else {
102 CHECK_EQ(kid_type, NodeType::kLeaf);
103 count++;
104 }
105
106 if (count >= CPDF_Document::kPageMaxNum) {
107 return std::nullopt; // Error: too many pages.
108 }
109 }
110 // Fix the in-memory representation for page tree nodes that violate the spec.
111 pages_dict->SetNewFor<CPDF_Number>("Count", count);
112 return count;
113 }
114
FindPageIndex(const CPDF_Dictionary * pNode,uint32_t * skip_count,uint32_t objnum,int * index,int level)115 int FindPageIndex(const CPDF_Dictionary* pNode,
116 uint32_t* skip_count,
117 uint32_t objnum,
118 int* index,
119 int level) {
120 if (!pNode->KeyExist("Kids")) {
121 if (objnum == pNode->GetObjNum())
122 return *index;
123
124 if (*skip_count != 0)
125 (*skip_count)--;
126
127 (*index)++;
128 return -1;
129 }
130
131 RetainPtr<const CPDF_Array> pKidList = pNode->GetArrayFor("Kids");
132 if (!pKidList)
133 return -1;
134
135 if (level >= kMaxPageLevel)
136 return -1;
137
138 size_t count = pNode->GetIntegerFor("Count");
139 if (count <= *skip_count) {
140 (*skip_count) -= count;
141 (*index) += count;
142 return -1;
143 }
144
145 if (count && count == pKidList->size()) {
146 for (size_t i = 0; i < count; i++) {
147 RetainPtr<const CPDF_Reference> pKid =
148 ToReference(pKidList->GetObjectAt(i));
149 if (pKid && pKid->GetRefObjNum() == objnum)
150 return static_cast<int>(*index + i);
151 }
152 }
153
154 for (size_t i = 0; i < pKidList->size(); i++) {
155 RetainPtr<const CPDF_Dictionary> pKid = pKidList->GetDictAt(i);
156 if (!pKid || pKid == pNode)
157 continue;
158
159 int found_index =
160 FindPageIndex(pKid.Get(), skip_count, objnum, index, level + 1);
161 if (found_index >= 0)
162 return found_index;
163 }
164 return -1;
165 }
166
167 } // namespace
168
CPDF_Document(std::unique_ptr<RenderDataIface> pRenderData,std::unique_ptr<PageDataIface> pPageData)169 CPDF_Document::CPDF_Document(std::unique_ptr<RenderDataIface> pRenderData,
170 std::unique_ptr<PageDataIface> pPageData)
171 : m_pDocRender(std::move(pRenderData)),
172 m_pDocPage(std::move(pPageData)),
173 m_StockFontClearer(m_pDocPage.get()) {
174 m_pDocRender->SetDocument(this);
175 m_pDocPage->SetDocument(this);
176 }
177
~CPDF_Document()178 CPDF_Document::~CPDF_Document() {
179 // Be absolutely certain that |m_pExtension| is null before destroying
180 // the extension, to avoid re-entering it while being destroyed. clang
181 // seems to already do this for us, but the C++ standards seem to
182 // indicate the opposite.
183 m_pExtension.reset();
184 }
185
186 // static
IsValidPageObject(const CPDF_Object * obj)187 bool CPDF_Document::IsValidPageObject(const CPDF_Object* obj) {
188 // See ISO 32000-1:2008 spec, table 30.
189 return ValidateDictType(ToDictionary(obj), "Page");
190 }
191
ParseIndirectObject(uint32_t objnum)192 RetainPtr<CPDF_Object> CPDF_Document::ParseIndirectObject(uint32_t objnum) {
193 return m_pParser ? m_pParser->ParseIndirectObject(objnum) : nullptr;
194 }
195
TryInit()196 bool CPDF_Document::TryInit() {
197 SetLastObjNum(m_pParser->GetLastObjNum());
198
199 RetainPtr<CPDF_Object> pRootObj =
200 GetOrParseIndirectObject(m_pParser->GetRootObjNum());
201 if (pRootObj)
202 m_pRootDict = pRootObj->GetMutableDict();
203
204 LoadPages();
205 return GetRoot() && GetPageCount() > 0;
206 }
207
LoadDoc(RetainPtr<IFX_SeekableReadStream> pFileAccess,const ByteString & password)208 CPDF_Parser::Error CPDF_Document::LoadDoc(
209 RetainPtr<IFX_SeekableReadStream> pFileAccess,
210 const ByteString& password) {
211 if (!m_pParser)
212 SetParser(std::make_unique<CPDF_Parser>(this));
213
214 return HandleLoadResult(
215 m_pParser->StartParse(std::move(pFileAccess), password));
216 }
217
LoadLinearizedDoc(RetainPtr<CPDF_ReadValidator> validator,const ByteString & password)218 CPDF_Parser::Error CPDF_Document::LoadLinearizedDoc(
219 RetainPtr<CPDF_ReadValidator> validator,
220 const ByteString& password) {
221 if (!m_pParser)
222 SetParser(std::make_unique<CPDF_Parser>(this));
223
224 return HandleLoadResult(
225 m_pParser->StartLinearizedParse(std::move(validator), password));
226 }
227
LoadPages()228 void CPDF_Document::LoadPages() {
229 const CPDF_LinearizedHeader* linearized_header =
230 m_pParser->GetLinearizedHeader();
231 if (!linearized_header) {
232 m_PageList.resize(RetrievePageCount());
233 return;
234 }
235
236 uint32_t objnum = linearized_header->GetFirstPageObjNum();
237 if (!IsValidPageObject(GetOrParseIndirectObject(objnum).Get())) {
238 m_PageList.resize(RetrievePageCount());
239 return;
240 }
241
242 uint32_t first_page_num = linearized_header->GetFirstPageNo();
243 uint32_t page_count = linearized_header->GetPageCount();
244 DCHECK(first_page_num < page_count);
245 m_PageList.resize(page_count);
246 m_PageList[first_page_num] = objnum;
247 }
248
TraversePDFPages(int iPage,int * nPagesToGo,size_t level)249 RetainPtr<CPDF_Dictionary> CPDF_Document::TraversePDFPages(int iPage,
250 int* nPagesToGo,
251 size_t level) {
252 if (*nPagesToGo < 0 || m_bReachedMaxPageLevel)
253 return nullptr;
254
255 RetainPtr<CPDF_Dictionary> pPages = m_pTreeTraversal[level].first;
256 RetainPtr<CPDF_Array> pKidList = pPages->GetMutableArrayFor("Kids");
257 if (!pKidList) {
258 m_pTreeTraversal.pop_back();
259 if (*nPagesToGo != 1)
260 return nullptr;
261 m_PageList[iPage] = pPages->GetObjNum();
262 return pPages;
263 }
264 if (level >= kMaxPageLevel) {
265 m_pTreeTraversal.pop_back();
266 m_bReachedMaxPageLevel = true;
267 return nullptr;
268 }
269 RetainPtr<CPDF_Dictionary> page;
270 for (size_t i = m_pTreeTraversal[level].second; i < pKidList->size(); i++) {
271 if (*nPagesToGo == 0)
272 break;
273 pKidList->ConvertToIndirectObjectAt(i, this);
274 RetainPtr<CPDF_Dictionary> pKid = pKidList->GetMutableDictAt(i);
275 if (!pKid) {
276 (*nPagesToGo)--;
277 m_pTreeTraversal[level].second++;
278 continue;
279 }
280 if (pKid == pPages) {
281 m_pTreeTraversal[level].second++;
282 continue;
283 }
284 if (!pKid->KeyExist("Kids")) {
285 m_PageList[iPage - (*nPagesToGo) + 1] = pKid->GetObjNum();
286 (*nPagesToGo)--;
287 m_pTreeTraversal[level].second++;
288 if (*nPagesToGo == 0) {
289 page = std::move(pKid);
290 break;
291 }
292 } else {
293 // If the vector has size level+1, the child is not in yet
294 if (m_pTreeTraversal.size() == level + 1)
295 m_pTreeTraversal.emplace_back(std::move(pKid), 0);
296 // Now m_pTreeTraversal[level+1] should exist and be equal to pKid.
297 RetainPtr<CPDF_Dictionary> pPageKid =
298 TraversePDFPages(iPage, nPagesToGo, level + 1);
299 // Check if child was completely processed, i.e. it popped itself out
300 if (m_pTreeTraversal.size() == level + 1)
301 m_pTreeTraversal[level].second++;
302 // If child did not finish, no pages to go, or max level reached, end
303 if (m_pTreeTraversal.size() != level + 1 || *nPagesToGo == 0 ||
304 m_bReachedMaxPageLevel) {
305 page = std::move(pPageKid);
306 break;
307 }
308 }
309 }
310 if (m_pTreeTraversal[level].second == pKidList->size())
311 m_pTreeTraversal.pop_back();
312 return page;
313 }
314
ResetTraversal()315 void CPDF_Document::ResetTraversal() {
316 m_iNextPageToTraverse = 0;
317 m_bReachedMaxPageLevel = false;
318 m_pTreeTraversal.clear();
319 }
320
SetParser(std::unique_ptr<CPDF_Parser> pParser)321 void CPDF_Document::SetParser(std::unique_ptr<CPDF_Parser> pParser) {
322 DCHECK(!m_pParser);
323 m_pParser = std::move(pParser);
324 }
325
HandleLoadResult(CPDF_Parser::Error error)326 CPDF_Parser::Error CPDF_Document::HandleLoadResult(CPDF_Parser::Error error) {
327 if (error == CPDF_Parser::SUCCESS)
328 m_bHasValidCrossReferenceTable = !m_pParser->xref_table_rebuilt();
329 return error;
330 }
331
GetPagesDict() const332 RetainPtr<const CPDF_Dictionary> CPDF_Document::GetPagesDict() const {
333 const CPDF_Dictionary* pRoot = GetRoot();
334 return pRoot ? pRoot->GetDictFor("Pages") : nullptr;
335 }
336
GetMutablePagesDict()337 RetainPtr<CPDF_Dictionary> CPDF_Document::GetMutablePagesDict() {
338 return pdfium::WrapRetain(
339 const_cast<CPDF_Dictionary*>(this->GetPagesDict().Get()));
340 }
341
IsPageLoaded(int iPage) const342 bool CPDF_Document::IsPageLoaded(int iPage) const {
343 return !!m_PageList[iPage];
344 }
345
GetPageDictionary(int iPage)346 RetainPtr<const CPDF_Dictionary> CPDF_Document::GetPageDictionary(int iPage) {
347 if (!fxcrt::IndexInBounds(m_PageList, iPage))
348 return nullptr;
349
350 const uint32_t objnum = m_PageList[iPage];
351 if (objnum) {
352 RetainPtr<CPDF_Dictionary> result =
353 ToDictionary(GetOrParseIndirectObject(objnum));
354 if (result)
355 return result;
356 }
357
358 RetainPtr<CPDF_Dictionary> pPages = GetMutablePagesDict();
359 if (!pPages)
360 return nullptr;
361
362 if (m_pTreeTraversal.empty()) {
363 ResetTraversal();
364 m_pTreeTraversal.emplace_back(std::move(pPages), 0);
365 }
366 int nPagesToGo = iPage - m_iNextPageToTraverse + 1;
367 RetainPtr<CPDF_Dictionary> pPage = TraversePDFPages(iPage, &nPagesToGo, 0);
368 m_iNextPageToTraverse = iPage + 1;
369 return pPage;
370 }
371
GetMutablePageDictionary(int iPage)372 RetainPtr<CPDF_Dictionary> CPDF_Document::GetMutablePageDictionary(int iPage) {
373 return pdfium::WrapRetain(
374 const_cast<CPDF_Dictionary*>(GetPageDictionary(iPage).Get()));
375 }
376
SetPageObjNum(int iPage,uint32_t objNum)377 void CPDF_Document::SetPageObjNum(int iPage, uint32_t objNum) {
378 m_PageList[iPage] = objNum;
379 }
380
GetOrCreateCodecContext()381 JBig2_DocumentContext* CPDF_Document::GetOrCreateCodecContext() {
382 if (!m_pCodecContext)
383 m_pCodecContext = std::make_unique<JBig2_DocumentContext>();
384 return m_pCodecContext.get();
385 }
386
CreateModifiedAPStream(RetainPtr<CPDF_Dictionary> dict)387 RetainPtr<CPDF_Stream> CPDF_Document::CreateModifiedAPStream(
388 RetainPtr<CPDF_Dictionary> dict) {
389 auto stream = NewIndirect<CPDF_Stream>(std::move(dict));
390 m_ModifiedAPStreamIDs.insert(stream->GetObjNum());
391 return stream;
392 }
393
IsModifiedAPStream(const CPDF_Stream * stream) const394 bool CPDF_Document::IsModifiedAPStream(const CPDF_Stream* stream) const {
395 return stream && pdfium::Contains(m_ModifiedAPStreamIDs, stream->GetObjNum());
396 }
397
GetPageIndex(uint32_t objnum)398 int CPDF_Document::GetPageIndex(uint32_t objnum) {
399 uint32_t skip_count = 0;
400 bool bSkipped = false;
401 for (uint32_t i = 0; i < m_PageList.size(); ++i) {
402 if (m_PageList[i] == objnum)
403 return i;
404
405 if (!bSkipped && m_PageList[i] == 0) {
406 skip_count = i;
407 bSkipped = true;
408 }
409 }
410 RetainPtr<const CPDF_Dictionary> pPages = GetPagesDict();
411 if (!pPages)
412 return -1;
413
414 int start_index = 0;
415 int found_index = FindPageIndex(pPages, &skip_count, objnum, &start_index, 0);
416
417 // Corrupt page tree may yield out-of-range results.
418 if (!fxcrt::IndexInBounds(m_PageList, found_index))
419 return -1;
420
421 // Only update |m_PageList| when |objnum| points to a /Page object.
422 if (IsValidPageObject(GetOrParseIndirectObject(objnum).Get()))
423 m_PageList[found_index] = objnum;
424 return found_index;
425 }
426
GetPageCount() const427 int CPDF_Document::GetPageCount() const {
428 return fxcrt::CollectionSize<int>(m_PageList);
429 }
430
RetrievePageCount()431 int CPDF_Document::RetrievePageCount() {
432 RetainPtr<CPDF_Dictionary> pPages = GetMutablePagesDict();
433 if (!pPages)
434 return 0;
435
436 if (!pPages->KeyExist("Kids"))
437 return 1;
438
439 std::set<RetainPtr<CPDF_Dictionary>> visited_pages = {pPages};
440 return CountPages(std::move(pPages), &visited_pages).value_or(0);
441 }
442
GetUserPermissions(bool get_owner_perms) const443 uint32_t CPDF_Document::GetUserPermissions(bool get_owner_perms) const {
444 return m_pParser ? m_pParser->GetPermissions(get_owner_perms) : 0;
445 }
446
GetFontFileStreamAcc(RetainPtr<const CPDF_Stream> pFontStream)447 RetainPtr<CPDF_StreamAcc> CPDF_Document::GetFontFileStreamAcc(
448 RetainPtr<const CPDF_Stream> pFontStream) {
449 return m_pDocPage->GetFontFileStreamAcc(std::move(pFontStream));
450 }
451
MaybePurgeFontFileStreamAcc(RetainPtr<CPDF_StreamAcc> && pStreamAcc)452 void CPDF_Document::MaybePurgeFontFileStreamAcc(
453 RetainPtr<CPDF_StreamAcc>&& pStreamAcc) {
454 m_pDocPage->MaybePurgeFontFileStreamAcc(std::move(pStreamAcc));
455 }
456
MaybePurgeImage(uint32_t objnum)457 void CPDF_Document::MaybePurgeImage(uint32_t objnum) {
458 m_pDocPage->MaybePurgeImage(objnum);
459 }
460
CreateNewDoc()461 void CPDF_Document::CreateNewDoc() {
462 DCHECK(!m_pRootDict);
463 DCHECK(!m_pInfoDict);
464 m_pRootDict = NewIndirect<CPDF_Dictionary>();
465 m_pRootDict->SetNewFor<CPDF_Name>("Type", "Catalog");
466
467 auto pPages = NewIndirect<CPDF_Dictionary>();
468 pPages->SetNewFor<CPDF_Name>("Type", "Pages");
469 pPages->SetNewFor<CPDF_Number>("Count", 0);
470 pPages->SetNewFor<CPDF_Array>("Kids");
471 m_pRootDict->SetNewFor<CPDF_Reference>("Pages", this, pPages->GetObjNum());
472 m_pInfoDict = NewIndirect<CPDF_Dictionary>();
473 }
474
CreateNewPage(int iPage)475 RetainPtr<CPDF_Dictionary> CPDF_Document::CreateNewPage(int iPage) {
476 auto pDict = NewIndirect<CPDF_Dictionary>();
477 pDict->SetNewFor<CPDF_Name>("Type", "Page");
478 uint32_t dwObjNum = pDict->GetObjNum();
479 if (!InsertNewPage(iPage, pDict)) {
480 DeleteIndirectObject(dwObjNum);
481 return nullptr;
482 }
483 return pDict;
484 }
485
InsertDeletePDFPage(RetainPtr<CPDF_Dictionary> pages_dict,int pages_to_go,RetainPtr<CPDF_Dictionary> page_dict,bool is_insert,std::set<RetainPtr<CPDF_Dictionary>> * visited)486 bool CPDF_Document::InsertDeletePDFPage(
487 RetainPtr<CPDF_Dictionary> pages_dict,
488 int pages_to_go,
489 RetainPtr<CPDF_Dictionary> page_dict,
490 bool is_insert,
491 std::set<RetainPtr<CPDF_Dictionary>>* visited) {
492 RetainPtr<CPDF_Array> kids_list = pages_dict->GetMutableArrayFor("Kids");
493 if (!kids_list) {
494 return false;
495 }
496
497 for (size_t i = 0; i < kids_list->size(); i++) {
498 RetainPtr<CPDF_Dictionary> kid_dict = kids_list->GetMutableDictAt(i);
499 NodeType kid_type = GetNodeType(kid_dict);
500 if (kid_type == NodeType::kLeaf) {
501 if (pages_to_go != 0) {
502 pages_to_go--;
503 continue;
504 }
505 if (is_insert) {
506 kids_list->InsertNewAt<CPDF_Reference>(i, this, page_dict->GetObjNum());
507 page_dict->SetNewFor<CPDF_Reference>("Parent", this,
508 pages_dict->GetObjNum());
509 } else {
510 kids_list->RemoveAt(i);
511 }
512 pages_dict->SetNewFor<CPDF_Number>(
513 "Count", pages_dict->GetIntegerFor("Count") + (is_insert ? 1 : -1));
514 ResetTraversal();
515 break;
516 }
517
518 CHECK_EQ(kid_type, NodeType::kBranch);
519 int page_count = kid_dict->GetIntegerFor("Count");
520 if (pages_to_go >= page_count) {
521 pages_to_go -= page_count;
522 continue;
523 }
524 if (pdfium::Contains(*visited, kid_dict)) {
525 return false;
526 }
527
528 ScopedSetInsertion<RetainPtr<CPDF_Dictionary>> insertion(visited, kid_dict);
529 if (!InsertDeletePDFPage(std::move(kid_dict), pages_to_go, page_dict,
530 is_insert, visited)) {
531 return false;
532 }
533 pages_dict->SetNewFor<CPDF_Number>(
534 "Count", pages_dict->GetIntegerFor("Count") + (is_insert ? 1 : -1));
535 break;
536 }
537 return true;
538 }
539
InsertNewPage(int iPage,RetainPtr<CPDF_Dictionary> pPageDict)540 bool CPDF_Document::InsertNewPage(int iPage,
541 RetainPtr<CPDF_Dictionary> pPageDict) {
542 RetainPtr<CPDF_Dictionary> pRoot = GetMutableRoot();
543 if (!pRoot)
544 return false;
545
546 RetainPtr<CPDF_Dictionary> pPages = pRoot->GetMutableDictFor("Pages");
547 if (!pPages)
548 return false;
549
550 int nPages = GetPageCount();
551 if (iPage < 0 || iPage > nPages)
552 return false;
553
554 if (iPage == nPages) {
555 RetainPtr<CPDF_Array> pPagesList = pPages->GetOrCreateArrayFor("Kids");
556 pPagesList->AppendNew<CPDF_Reference>(this, pPageDict->GetObjNum());
557 pPages->SetNewFor<CPDF_Number>("Count", nPages + 1);
558 pPageDict->SetNewFor<CPDF_Reference>("Parent", this, pPages->GetObjNum());
559 ResetTraversal();
560 } else {
561 std::set<RetainPtr<CPDF_Dictionary>> stack = {pPages};
562 if (!InsertDeletePDFPage(std::move(pPages), iPage, pPageDict, true, &stack))
563 return false;
564 }
565 m_PageList.insert(m_PageList.begin() + iPage, pPageDict->GetObjNum());
566 return true;
567 }
568
GetInfo()569 RetainPtr<CPDF_Dictionary> CPDF_Document::GetInfo() {
570 if (m_pInfoDict)
571 return m_pInfoDict;
572
573 if (!m_pParser)
574 return nullptr;
575
576 uint32_t info_obj_num = m_pParser->GetInfoObjNum();
577 if (info_obj_num == 0)
578 return nullptr;
579
580 auto ref = pdfium::MakeRetain<CPDF_Reference>(this, info_obj_num);
581 m_pInfoDict = ToDictionary(ref->GetMutableDirect());
582 return m_pInfoDict;
583 }
584
GetFileIdentifier() const585 RetainPtr<const CPDF_Array> CPDF_Document::GetFileIdentifier() const {
586 return m_pParser ? m_pParser->GetIDArray() : nullptr;
587 }
588
DeletePage(int iPage)589 uint32_t CPDF_Document::DeletePage(int iPage) {
590 RetainPtr<CPDF_Dictionary> pPages = GetMutablePagesDict();
591 if (!pPages) {
592 return 0;
593 }
594
595 int nPages = pPages->GetIntegerFor("Count");
596 if (iPage < 0 || iPage >= nPages) {
597 return 0;
598 }
599
600 RetainPtr<const CPDF_Dictionary> page_dict = GetPageDictionary(iPage);
601 if (!page_dict) {
602 return 0;
603 }
604
605 std::set<RetainPtr<CPDF_Dictionary>> stack = {pPages};
606 if (!InsertDeletePDFPage(std::move(pPages), iPage, nullptr, false, &stack)) {
607 return 0;
608 }
609
610 m_PageList.erase(m_PageList.begin() + iPage);
611 return page_dict->GetObjNum();
612 }
613
SetPageToNullObject(uint32_t page_obj_num)614 void CPDF_Document::SetPageToNullObject(uint32_t page_obj_num) {
615 if (!page_obj_num || m_PageList.empty()) {
616 return;
617 }
618
619 // Load all pages so `m_PageList` has all the object numbers.
620 for (size_t i = 0; i < m_PageList.size(); ++i) {
621 GetPageDictionary(i);
622 }
623
624 if (pdfium::Contains(m_PageList, page_obj_num)) {
625 return;
626 }
627
628 // If `page_dict` is no longer in the page tree, replace it with an object of
629 // type null.
630 //
631 // Delete the object first from this container, so the conditional in the
632 // replacement call always evaluates to true.
633 DeleteIndirectObject(page_obj_num);
634 const bool replaced = ReplaceIndirectObjectIfHigherGeneration(
635 page_obj_num, pdfium::MakeRetain<CPDF_Null>());
636 CHECK(replaced);
637 }
638
SetRootForTesting(RetainPtr<CPDF_Dictionary> root)639 void CPDF_Document::SetRootForTesting(RetainPtr<CPDF_Dictionary> root) {
640 m_pRootDict = std::move(root);
641 }
642
MovePages(pdfium::span<const int> page_indices,int dest_page_index)643 bool CPDF_Document::MovePages(pdfium::span<const int> page_indices,
644 int dest_page_index) {
645 const CPDF_Dictionary* pages = GetPagesDict();
646 const int num_pages_signed = pages ? pages->GetIntegerFor("Count") : 0;
647 if (num_pages_signed <= 0) {
648 return false;
649 }
650 const size_t num_pages = num_pages_signed;
651
652 // Check the number of pages is in range.
653 if (page_indices.empty() || page_indices.size() > num_pages) {
654 return false;
655 }
656
657 // Check that destination page index is in range.
658 if (dest_page_index < 0 ||
659 static_cast<size_t>(dest_page_index) > num_pages - page_indices.size()) {
660 return false;
661 }
662
663 // Check for if XFA is enabled.
664 Extension* extension = GetExtension();
665 if (extension && extension->ContainsExtensionForm()) {
666 // Don't manipulate XFA PDFs.
667 return false;
668 }
669
670 // Check for duplicate and out-of-range page indices
671 std::set<int> unique_page_indices;
672 // Store the pages that need to be moved. They'll be deleted then reinserted.
673 std::vector<RetainPtr<CPDF_Dictionary>> pages_to_move;
674 pages_to_move.reserve(page_indices.size());
675 // Store the page indices that will be deleted (and moved).
676 std::vector<int> page_indices_to_delete;
677 page_indices_to_delete.reserve(page_indices.size());
678 for (const int page_index : page_indices) {
679 bool inserted = unique_page_indices.insert(page_index).second;
680 if (!inserted) {
681 // Duplicate page index found
682 return false;
683 }
684 RetainPtr<CPDF_Dictionary> page = GetMutablePageDictionary(page_index);
685 if (!page) {
686 // Page not found, index might be out of range.
687 return false;
688 }
689 pages_to_move.push_back(std::move(page));
690 page_indices_to_delete.push_back(page_index);
691 }
692
693 // Sort the page indices to be deleted in descending order.
694 std::sort(page_indices_to_delete.begin(), page_indices_to_delete.end(),
695 std::greater<int>());
696 // Delete the pages in descending order.
697 if (extension) {
698 for (int page_index : page_indices_to_delete) {
699 extension->DeletePage(page_index);
700 }
701 } else {
702 for (int page_index : page_indices_to_delete) {
703 DeletePage(page_index);
704 }
705 }
706
707 // Insert the deleted pages back into the document at the destination page
708 // index.
709 for (size_t i = 0; i < pages_to_move.size(); ++i) {
710 if (!InsertNewPage(i + dest_page_index, pages_to_move[i])) {
711 // Fail in an indeterminate state.
712 return false;
713 }
714 }
715
716 return true;
717 }
718
ResizePageListForTesting(size_t size)719 void CPDF_Document::ResizePageListForTesting(size_t size) {
720 m_PageList.resize(size);
721 }
722
StockFontClearer(CPDF_Document::PageDataIface * pPageData)723 CPDF_Document::StockFontClearer::StockFontClearer(
724 CPDF_Document::PageDataIface* pPageData)
725 : m_pPageData(pPageData) {}
726
~StockFontClearer()727 CPDF_Document::StockFontClearer::~StockFontClearer() {
728 m_pPageData->ClearStockFont();
729 }
730
731 CPDF_Document::PageDataIface::PageDataIface() = default;
732
733 CPDF_Document::PageDataIface::~PageDataIface() = default;
734
735 CPDF_Document::RenderDataIface::RenderDataIface() = default;
736
737 CPDF_Document::RenderDataIface::~RenderDataIface() = default;
738