• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_document.h"
8 
9 #include <algorithm>
10 #include <functional>
11 #include <optional>
12 #include <utility>
13 
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
17 #include "core/fpdfapi/parser/cpdf_name.h"
18 #include "core/fpdfapi/parser/cpdf_null.h"
19 #include "core/fpdfapi/parser/cpdf_number.h"
20 #include "core/fpdfapi/parser/cpdf_parser.h"
21 #include "core/fpdfapi/parser/cpdf_read_validator.h"
22 #include "core/fpdfapi/parser/cpdf_reference.h"
23 #include "core/fpdfapi/parser/cpdf_stream.h"
24 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
25 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
26 #include "core/fxcodec/jbig2/JBig2_DocumentContext.h"
27 #include "core/fxcrt/check.h"
28 #include "core/fxcrt/check_op.h"
29 #include "core/fxcrt/containers/contains.h"
30 #include "core/fxcrt/fx_codepage.h"
31 #include "core/fxcrt/scoped_set_insertion.h"
32 #include "core/fxcrt/span.h"
33 #include "core/fxcrt/stl_util.h"
34 
35 namespace {
36 
37 const int kMaxPageLevel = 1024;
38 
39 enum class NodeType : bool {
40   kBranch,  // /Type /Pages, AKA page tree node.
41   kLeaf,    // /Type /Page, AKA page object.
42 };
43 
44 // Note that this function may modify `kid_dict` to correct PDF spec violations.
45 // Same reasoning as CountPages() below.
GetNodeType(RetainPtr<CPDF_Dictionary> kid_dict)46 NodeType GetNodeType(RetainPtr<CPDF_Dictionary> kid_dict) {
47   const ByteString kid_type_value = kid_dict->GetNameFor("Type");
48   if (kid_type_value == "Pages") {
49     return NodeType::kBranch;
50   }
51   if (kid_type_value == "Page") {
52     return NodeType::kLeaf;
53   }
54 
55   // Even though /Type is required for page tree nodes and page objects, PDFs
56   // may not have them or have the wrong type. Tolerate these errors and guess
57   // the type. Then fix the in-memory representation.
58   const bool has_kids = kid_dict->KeyExist("Kids");
59   kid_dict->SetNewFor<CPDF_Name>("Type", has_kids ? "Pages" : "Page");
60   return has_kids ? NodeType::kBranch : NodeType::kLeaf;
61 }
62 
63 // Returns a value in the range [0, `CPDF_Document::kPageMaxNum`), or nullopt on
64 // error. Note that this function may modify `pages_dict` to correct PDF spec
65 // violations. By normalizing the in-memory representation, other code that
66 // reads the object do not have to deal with the same spec violations again.
67 // If the PDF gets saved, the saved copy will also be more spec-compliant.
CountPages(RetainPtr<CPDF_Dictionary> pages_dict,std::set<RetainPtr<CPDF_Dictionary>> * visited_pages)68 std::optional<int> CountPages(
69     RetainPtr<CPDF_Dictionary> pages_dict,
70     std::set<RetainPtr<CPDF_Dictionary>>* visited_pages) {
71   // Required. See ISO 32000-1:2008 spec, table 29, but tolerate page tree nodes
72   // that violate the spec.
73   int count_from_dict = pages_dict->GetIntegerFor("Count");
74   if (count_from_dict > 0 && count_from_dict < CPDF_Document::kPageMaxNum) {
75     return count_from_dict;
76   }
77 
78   RetainPtr<CPDF_Array> kids_array = pages_dict->GetMutableArrayFor("Kids");
79   if (!kids_array) {
80     return 0;
81   }
82 
83   int count = 0;
84   for (size_t i = 0; i < kids_array->size(); i++) {
85     RetainPtr<CPDF_Dictionary> kid_dict = kids_array->GetMutableDictAt(i);
86     if (!kid_dict || pdfium::Contains(*visited_pages, kid_dict)) {
87       continue;
88     }
89 
90     NodeType kid_type = GetNodeType(kid_dict);
91     if (kid_type == NodeType::kBranch) {
92       // Use |visited_pages| to help detect circular references of pages.
93       ScopedSetInsertion<RetainPtr<CPDF_Dictionary>> local_add(visited_pages,
94                                                                kid_dict);
95       std::optional<int> local_count =
96           CountPages(std::move(kid_dict), visited_pages);
97       if (!local_count.has_value()) {
98         return std::nullopt;  // Propagate error.
99       }
100       count += local_count.value();
101     } else {
102       CHECK_EQ(kid_type, NodeType::kLeaf);
103       count++;
104     }
105 
106     if (count >= CPDF_Document::kPageMaxNum) {
107       return std::nullopt;  // Error: too many pages.
108     }
109   }
110   // Fix the in-memory representation for page tree nodes that violate the spec.
111   pages_dict->SetNewFor<CPDF_Number>("Count", count);
112   return count;
113 }
114 
FindPageIndex(const CPDF_Dictionary * pNode,uint32_t * skip_count,uint32_t objnum,int * index,int level)115 int FindPageIndex(const CPDF_Dictionary* pNode,
116                   uint32_t* skip_count,
117                   uint32_t objnum,
118                   int* index,
119                   int level) {
120   if (!pNode->KeyExist("Kids")) {
121     if (objnum == pNode->GetObjNum())
122       return *index;
123 
124     if (*skip_count != 0)
125       (*skip_count)--;
126 
127     (*index)++;
128     return -1;
129   }
130 
131   RetainPtr<const CPDF_Array> pKidList = pNode->GetArrayFor("Kids");
132   if (!pKidList)
133     return -1;
134 
135   if (level >= kMaxPageLevel)
136     return -1;
137 
138   size_t count = pNode->GetIntegerFor("Count");
139   if (count <= *skip_count) {
140     (*skip_count) -= count;
141     (*index) += count;
142     return -1;
143   }
144 
145   if (count && count == pKidList->size()) {
146     for (size_t i = 0; i < count; i++) {
147       RetainPtr<const CPDF_Reference> pKid =
148           ToReference(pKidList->GetObjectAt(i));
149       if (pKid && pKid->GetRefObjNum() == objnum)
150         return static_cast<int>(*index + i);
151     }
152   }
153 
154   for (size_t i = 0; i < pKidList->size(); i++) {
155     RetainPtr<const CPDF_Dictionary> pKid = pKidList->GetDictAt(i);
156     if (!pKid || pKid == pNode)
157       continue;
158 
159     int found_index =
160         FindPageIndex(pKid.Get(), skip_count, objnum, index, level + 1);
161     if (found_index >= 0)
162       return found_index;
163   }
164   return -1;
165 }
166 
167 }  // namespace
168 
CPDF_Document(std::unique_ptr<RenderDataIface> pRenderData,std::unique_ptr<PageDataIface> pPageData)169 CPDF_Document::CPDF_Document(std::unique_ptr<RenderDataIface> pRenderData,
170                              std::unique_ptr<PageDataIface> pPageData)
171     : m_pDocRender(std::move(pRenderData)),
172       m_pDocPage(std::move(pPageData)),
173       m_StockFontClearer(m_pDocPage.get()) {
174   m_pDocRender->SetDocument(this);
175   m_pDocPage->SetDocument(this);
176 }
177 
~CPDF_Document()178 CPDF_Document::~CPDF_Document() {
179   // Be absolutely certain that |m_pExtension| is null before destroying
180   // the extension, to avoid re-entering it while being destroyed. clang
181   // seems to already do this for us, but the C++ standards seem to
182   // indicate the opposite.
183   m_pExtension.reset();
184 }
185 
186 // static
IsValidPageObject(const CPDF_Object * obj)187 bool CPDF_Document::IsValidPageObject(const CPDF_Object* obj) {
188   // See ISO 32000-1:2008 spec, table 30.
189   return ValidateDictType(ToDictionary(obj), "Page");
190 }
191 
ParseIndirectObject(uint32_t objnum)192 RetainPtr<CPDF_Object> CPDF_Document::ParseIndirectObject(uint32_t objnum) {
193   return m_pParser ? m_pParser->ParseIndirectObject(objnum) : nullptr;
194 }
195 
TryInit()196 bool CPDF_Document::TryInit() {
197   SetLastObjNum(m_pParser->GetLastObjNum());
198 
199   RetainPtr<CPDF_Object> pRootObj =
200       GetOrParseIndirectObject(m_pParser->GetRootObjNum());
201   if (pRootObj)
202     m_pRootDict = pRootObj->GetMutableDict();
203 
204   LoadPages();
205   return GetRoot() && GetPageCount() > 0;
206 }
207 
LoadDoc(RetainPtr<IFX_SeekableReadStream> pFileAccess,const ByteString & password)208 CPDF_Parser::Error CPDF_Document::LoadDoc(
209     RetainPtr<IFX_SeekableReadStream> pFileAccess,
210     const ByteString& password) {
211   if (!m_pParser)
212     SetParser(std::make_unique<CPDF_Parser>(this));
213 
214   return HandleLoadResult(
215       m_pParser->StartParse(std::move(pFileAccess), password));
216 }
217 
LoadLinearizedDoc(RetainPtr<CPDF_ReadValidator> validator,const ByteString & password)218 CPDF_Parser::Error CPDF_Document::LoadLinearizedDoc(
219     RetainPtr<CPDF_ReadValidator> validator,
220     const ByteString& password) {
221   if (!m_pParser)
222     SetParser(std::make_unique<CPDF_Parser>(this));
223 
224   return HandleLoadResult(
225       m_pParser->StartLinearizedParse(std::move(validator), password));
226 }
227 
LoadPages()228 void CPDF_Document::LoadPages() {
229   const CPDF_LinearizedHeader* linearized_header =
230       m_pParser->GetLinearizedHeader();
231   if (!linearized_header) {
232     m_PageList.resize(RetrievePageCount());
233     return;
234   }
235 
236   uint32_t objnum = linearized_header->GetFirstPageObjNum();
237   if (!IsValidPageObject(GetOrParseIndirectObject(objnum).Get())) {
238     m_PageList.resize(RetrievePageCount());
239     return;
240   }
241 
242   uint32_t first_page_num = linearized_header->GetFirstPageNo();
243   uint32_t page_count = linearized_header->GetPageCount();
244   DCHECK(first_page_num < page_count);
245   m_PageList.resize(page_count);
246   m_PageList[first_page_num] = objnum;
247 }
248 
TraversePDFPages(int iPage,int * nPagesToGo,size_t level)249 RetainPtr<CPDF_Dictionary> CPDF_Document::TraversePDFPages(int iPage,
250                                                            int* nPagesToGo,
251                                                            size_t level) {
252   if (*nPagesToGo < 0 || m_bReachedMaxPageLevel)
253     return nullptr;
254 
255   RetainPtr<CPDF_Dictionary> pPages = m_pTreeTraversal[level].first;
256   RetainPtr<CPDF_Array> pKidList = pPages->GetMutableArrayFor("Kids");
257   if (!pKidList) {
258     m_pTreeTraversal.pop_back();
259     if (*nPagesToGo != 1)
260       return nullptr;
261     m_PageList[iPage] = pPages->GetObjNum();
262     return pPages;
263   }
264   if (level >= kMaxPageLevel) {
265     m_pTreeTraversal.pop_back();
266     m_bReachedMaxPageLevel = true;
267     return nullptr;
268   }
269   RetainPtr<CPDF_Dictionary> page;
270   for (size_t i = m_pTreeTraversal[level].second; i < pKidList->size(); i++) {
271     if (*nPagesToGo == 0)
272       break;
273     pKidList->ConvertToIndirectObjectAt(i, this);
274     RetainPtr<CPDF_Dictionary> pKid = pKidList->GetMutableDictAt(i);
275     if (!pKid) {
276       (*nPagesToGo)--;
277       m_pTreeTraversal[level].second++;
278       continue;
279     }
280     if (pKid == pPages) {
281       m_pTreeTraversal[level].second++;
282       continue;
283     }
284     if (!pKid->KeyExist("Kids")) {
285       m_PageList[iPage - (*nPagesToGo) + 1] = pKid->GetObjNum();
286       (*nPagesToGo)--;
287       m_pTreeTraversal[level].second++;
288       if (*nPagesToGo == 0) {
289         page = std::move(pKid);
290         break;
291       }
292     } else {
293       // If the vector has size level+1, the child is not in yet
294       if (m_pTreeTraversal.size() == level + 1)
295         m_pTreeTraversal.emplace_back(std::move(pKid), 0);
296       // Now m_pTreeTraversal[level+1] should exist and be equal to pKid.
297       RetainPtr<CPDF_Dictionary> pPageKid =
298           TraversePDFPages(iPage, nPagesToGo, level + 1);
299       // Check if child was completely processed, i.e. it popped itself out
300       if (m_pTreeTraversal.size() == level + 1)
301         m_pTreeTraversal[level].second++;
302       // If child did not finish, no pages to go, or max level reached, end
303       if (m_pTreeTraversal.size() != level + 1 || *nPagesToGo == 0 ||
304           m_bReachedMaxPageLevel) {
305         page = std::move(pPageKid);
306         break;
307       }
308     }
309   }
310   if (m_pTreeTraversal[level].second == pKidList->size())
311     m_pTreeTraversal.pop_back();
312   return page;
313 }
314 
ResetTraversal()315 void CPDF_Document::ResetTraversal() {
316   m_iNextPageToTraverse = 0;
317   m_bReachedMaxPageLevel = false;
318   m_pTreeTraversal.clear();
319 }
320 
SetParser(std::unique_ptr<CPDF_Parser> pParser)321 void CPDF_Document::SetParser(std::unique_ptr<CPDF_Parser> pParser) {
322   DCHECK(!m_pParser);
323   m_pParser = std::move(pParser);
324 }
325 
HandleLoadResult(CPDF_Parser::Error error)326 CPDF_Parser::Error CPDF_Document::HandleLoadResult(CPDF_Parser::Error error) {
327   if (error == CPDF_Parser::SUCCESS)
328     m_bHasValidCrossReferenceTable = !m_pParser->xref_table_rebuilt();
329   return error;
330 }
331 
GetPagesDict() const332 RetainPtr<const CPDF_Dictionary> CPDF_Document::GetPagesDict() const {
333   const CPDF_Dictionary* pRoot = GetRoot();
334   return pRoot ? pRoot->GetDictFor("Pages") : nullptr;
335 }
336 
GetMutablePagesDict()337 RetainPtr<CPDF_Dictionary> CPDF_Document::GetMutablePagesDict() {
338   return pdfium::WrapRetain(
339       const_cast<CPDF_Dictionary*>(this->GetPagesDict().Get()));
340 }
341 
IsPageLoaded(int iPage) const342 bool CPDF_Document::IsPageLoaded(int iPage) const {
343   return !!m_PageList[iPage];
344 }
345 
GetPageDictionary(int iPage)346 RetainPtr<const CPDF_Dictionary> CPDF_Document::GetPageDictionary(int iPage) {
347   if (!fxcrt::IndexInBounds(m_PageList, iPage))
348     return nullptr;
349 
350   const uint32_t objnum = m_PageList[iPage];
351   if (objnum) {
352     RetainPtr<CPDF_Dictionary> result =
353         ToDictionary(GetOrParseIndirectObject(objnum));
354     if (result)
355       return result;
356   }
357 
358   RetainPtr<CPDF_Dictionary> pPages = GetMutablePagesDict();
359   if (!pPages)
360     return nullptr;
361 
362   if (m_pTreeTraversal.empty()) {
363     ResetTraversal();
364     m_pTreeTraversal.emplace_back(std::move(pPages), 0);
365   }
366   int nPagesToGo = iPage - m_iNextPageToTraverse + 1;
367   RetainPtr<CPDF_Dictionary> pPage = TraversePDFPages(iPage, &nPagesToGo, 0);
368   m_iNextPageToTraverse = iPage + 1;
369   return pPage;
370 }
371 
GetMutablePageDictionary(int iPage)372 RetainPtr<CPDF_Dictionary> CPDF_Document::GetMutablePageDictionary(int iPage) {
373   return pdfium::WrapRetain(
374       const_cast<CPDF_Dictionary*>(GetPageDictionary(iPage).Get()));
375 }
376 
SetPageObjNum(int iPage,uint32_t objNum)377 void CPDF_Document::SetPageObjNum(int iPage, uint32_t objNum) {
378   m_PageList[iPage] = objNum;
379 }
380 
GetOrCreateCodecContext()381 JBig2_DocumentContext* CPDF_Document::GetOrCreateCodecContext() {
382   if (!m_pCodecContext)
383     m_pCodecContext = std::make_unique<JBig2_DocumentContext>();
384   return m_pCodecContext.get();
385 }
386 
CreateModifiedAPStream(RetainPtr<CPDF_Dictionary> dict)387 RetainPtr<CPDF_Stream> CPDF_Document::CreateModifiedAPStream(
388     RetainPtr<CPDF_Dictionary> dict) {
389   auto stream = NewIndirect<CPDF_Stream>(std::move(dict));
390   m_ModifiedAPStreamIDs.insert(stream->GetObjNum());
391   return stream;
392 }
393 
IsModifiedAPStream(const CPDF_Stream * stream) const394 bool CPDF_Document::IsModifiedAPStream(const CPDF_Stream* stream) const {
395   return stream && pdfium::Contains(m_ModifiedAPStreamIDs, stream->GetObjNum());
396 }
397 
GetPageIndex(uint32_t objnum)398 int CPDF_Document::GetPageIndex(uint32_t objnum) {
399   uint32_t skip_count = 0;
400   bool bSkipped = false;
401   for (uint32_t i = 0; i < m_PageList.size(); ++i) {
402     if (m_PageList[i] == objnum)
403       return i;
404 
405     if (!bSkipped && m_PageList[i] == 0) {
406       skip_count = i;
407       bSkipped = true;
408     }
409   }
410   RetainPtr<const CPDF_Dictionary> pPages = GetPagesDict();
411   if (!pPages)
412     return -1;
413 
414   int start_index = 0;
415   int found_index = FindPageIndex(pPages, &skip_count, objnum, &start_index, 0);
416 
417   // Corrupt page tree may yield out-of-range results.
418   if (!fxcrt::IndexInBounds(m_PageList, found_index))
419     return -1;
420 
421   // Only update |m_PageList| when |objnum| points to a /Page object.
422   if (IsValidPageObject(GetOrParseIndirectObject(objnum).Get()))
423     m_PageList[found_index] = objnum;
424   return found_index;
425 }
426 
GetPageCount() const427 int CPDF_Document::GetPageCount() const {
428   return fxcrt::CollectionSize<int>(m_PageList);
429 }
430 
RetrievePageCount()431 int CPDF_Document::RetrievePageCount() {
432   RetainPtr<CPDF_Dictionary> pPages = GetMutablePagesDict();
433   if (!pPages)
434     return 0;
435 
436   if (!pPages->KeyExist("Kids"))
437     return 1;
438 
439   std::set<RetainPtr<CPDF_Dictionary>> visited_pages = {pPages};
440   return CountPages(std::move(pPages), &visited_pages).value_or(0);
441 }
442 
GetUserPermissions(bool get_owner_perms) const443 uint32_t CPDF_Document::GetUserPermissions(bool get_owner_perms) const {
444   return m_pParser ? m_pParser->GetPermissions(get_owner_perms) : 0;
445 }
446 
GetFontFileStreamAcc(RetainPtr<const CPDF_Stream> pFontStream)447 RetainPtr<CPDF_StreamAcc> CPDF_Document::GetFontFileStreamAcc(
448     RetainPtr<const CPDF_Stream> pFontStream) {
449   return m_pDocPage->GetFontFileStreamAcc(std::move(pFontStream));
450 }
451 
MaybePurgeFontFileStreamAcc(RetainPtr<CPDF_StreamAcc> && pStreamAcc)452 void CPDF_Document::MaybePurgeFontFileStreamAcc(
453     RetainPtr<CPDF_StreamAcc>&& pStreamAcc) {
454   m_pDocPage->MaybePurgeFontFileStreamAcc(std::move(pStreamAcc));
455 }
456 
MaybePurgeImage(uint32_t objnum)457 void CPDF_Document::MaybePurgeImage(uint32_t objnum) {
458   m_pDocPage->MaybePurgeImage(objnum);
459 }
460 
CreateNewDoc()461 void CPDF_Document::CreateNewDoc() {
462   DCHECK(!m_pRootDict);
463   DCHECK(!m_pInfoDict);
464   m_pRootDict = NewIndirect<CPDF_Dictionary>();
465   m_pRootDict->SetNewFor<CPDF_Name>("Type", "Catalog");
466 
467   auto pPages = NewIndirect<CPDF_Dictionary>();
468   pPages->SetNewFor<CPDF_Name>("Type", "Pages");
469   pPages->SetNewFor<CPDF_Number>("Count", 0);
470   pPages->SetNewFor<CPDF_Array>("Kids");
471   m_pRootDict->SetNewFor<CPDF_Reference>("Pages", this, pPages->GetObjNum());
472   m_pInfoDict = NewIndirect<CPDF_Dictionary>();
473 }
474 
CreateNewPage(int iPage)475 RetainPtr<CPDF_Dictionary> CPDF_Document::CreateNewPage(int iPage) {
476   auto pDict = NewIndirect<CPDF_Dictionary>();
477   pDict->SetNewFor<CPDF_Name>("Type", "Page");
478   uint32_t dwObjNum = pDict->GetObjNum();
479   if (!InsertNewPage(iPage, pDict)) {
480     DeleteIndirectObject(dwObjNum);
481     return nullptr;
482   }
483   return pDict;
484 }
485 
InsertDeletePDFPage(RetainPtr<CPDF_Dictionary> pages_dict,int pages_to_go,RetainPtr<CPDF_Dictionary> page_dict,bool is_insert,std::set<RetainPtr<CPDF_Dictionary>> * visited)486 bool CPDF_Document::InsertDeletePDFPage(
487     RetainPtr<CPDF_Dictionary> pages_dict,
488     int pages_to_go,
489     RetainPtr<CPDF_Dictionary> page_dict,
490     bool is_insert,
491     std::set<RetainPtr<CPDF_Dictionary>>* visited) {
492   RetainPtr<CPDF_Array> kids_list = pages_dict->GetMutableArrayFor("Kids");
493   if (!kids_list) {
494     return false;
495   }
496 
497   for (size_t i = 0; i < kids_list->size(); i++) {
498     RetainPtr<CPDF_Dictionary> kid_dict = kids_list->GetMutableDictAt(i);
499     NodeType kid_type = GetNodeType(kid_dict);
500     if (kid_type == NodeType::kLeaf) {
501       if (pages_to_go != 0) {
502         pages_to_go--;
503         continue;
504       }
505       if (is_insert) {
506         kids_list->InsertNewAt<CPDF_Reference>(i, this, page_dict->GetObjNum());
507         page_dict->SetNewFor<CPDF_Reference>("Parent", this,
508                                              pages_dict->GetObjNum());
509       } else {
510         kids_list->RemoveAt(i);
511       }
512       pages_dict->SetNewFor<CPDF_Number>(
513           "Count", pages_dict->GetIntegerFor("Count") + (is_insert ? 1 : -1));
514       ResetTraversal();
515       break;
516     }
517 
518     CHECK_EQ(kid_type, NodeType::kBranch);
519     int page_count = kid_dict->GetIntegerFor("Count");
520     if (pages_to_go >= page_count) {
521       pages_to_go -= page_count;
522       continue;
523     }
524     if (pdfium::Contains(*visited, kid_dict)) {
525       return false;
526     }
527 
528     ScopedSetInsertion<RetainPtr<CPDF_Dictionary>> insertion(visited, kid_dict);
529     if (!InsertDeletePDFPage(std::move(kid_dict), pages_to_go, page_dict,
530                              is_insert, visited)) {
531       return false;
532     }
533     pages_dict->SetNewFor<CPDF_Number>(
534         "Count", pages_dict->GetIntegerFor("Count") + (is_insert ? 1 : -1));
535     break;
536   }
537   return true;
538 }
539 
InsertNewPage(int iPage,RetainPtr<CPDF_Dictionary> pPageDict)540 bool CPDF_Document::InsertNewPage(int iPage,
541                                   RetainPtr<CPDF_Dictionary> pPageDict) {
542   RetainPtr<CPDF_Dictionary> pRoot = GetMutableRoot();
543   if (!pRoot)
544     return false;
545 
546   RetainPtr<CPDF_Dictionary> pPages = pRoot->GetMutableDictFor("Pages");
547   if (!pPages)
548     return false;
549 
550   int nPages = GetPageCount();
551   if (iPage < 0 || iPage > nPages)
552     return false;
553 
554   if (iPage == nPages) {
555     RetainPtr<CPDF_Array> pPagesList = pPages->GetOrCreateArrayFor("Kids");
556     pPagesList->AppendNew<CPDF_Reference>(this, pPageDict->GetObjNum());
557     pPages->SetNewFor<CPDF_Number>("Count", nPages + 1);
558     pPageDict->SetNewFor<CPDF_Reference>("Parent", this, pPages->GetObjNum());
559     ResetTraversal();
560   } else {
561     std::set<RetainPtr<CPDF_Dictionary>> stack = {pPages};
562     if (!InsertDeletePDFPage(std::move(pPages), iPage, pPageDict, true, &stack))
563       return false;
564   }
565   m_PageList.insert(m_PageList.begin() + iPage, pPageDict->GetObjNum());
566   return true;
567 }
568 
GetInfo()569 RetainPtr<CPDF_Dictionary> CPDF_Document::GetInfo() {
570   if (m_pInfoDict)
571     return m_pInfoDict;
572 
573   if (!m_pParser)
574     return nullptr;
575 
576   uint32_t info_obj_num = m_pParser->GetInfoObjNum();
577   if (info_obj_num == 0)
578     return nullptr;
579 
580   auto ref = pdfium::MakeRetain<CPDF_Reference>(this, info_obj_num);
581   m_pInfoDict = ToDictionary(ref->GetMutableDirect());
582   return m_pInfoDict;
583 }
584 
GetFileIdentifier() const585 RetainPtr<const CPDF_Array> CPDF_Document::GetFileIdentifier() const {
586   return m_pParser ? m_pParser->GetIDArray() : nullptr;
587 }
588 
DeletePage(int iPage)589 uint32_t CPDF_Document::DeletePage(int iPage) {
590   RetainPtr<CPDF_Dictionary> pPages = GetMutablePagesDict();
591   if (!pPages) {
592     return 0;
593   }
594 
595   int nPages = pPages->GetIntegerFor("Count");
596   if (iPage < 0 || iPage >= nPages) {
597     return 0;
598   }
599 
600   RetainPtr<const CPDF_Dictionary> page_dict = GetPageDictionary(iPage);
601   if (!page_dict) {
602     return 0;
603   }
604 
605   std::set<RetainPtr<CPDF_Dictionary>> stack = {pPages};
606   if (!InsertDeletePDFPage(std::move(pPages), iPage, nullptr, false, &stack)) {
607     return 0;
608   }
609 
610   m_PageList.erase(m_PageList.begin() + iPage);
611   return page_dict->GetObjNum();
612 }
613 
SetPageToNullObject(uint32_t page_obj_num)614 void CPDF_Document::SetPageToNullObject(uint32_t page_obj_num) {
615   if (!page_obj_num || m_PageList.empty()) {
616     return;
617   }
618 
619   // Load all pages so `m_PageList` has all the object numbers.
620   for (size_t i = 0; i < m_PageList.size(); ++i) {
621     GetPageDictionary(i);
622   }
623 
624   if (pdfium::Contains(m_PageList, page_obj_num)) {
625     return;
626   }
627 
628   // If `page_dict` is no longer in the page tree, replace it with an object of
629   // type null.
630   //
631   // Delete the object first from this container, so the conditional in the
632   // replacement call always evaluates to true.
633   DeleteIndirectObject(page_obj_num);
634   const bool replaced = ReplaceIndirectObjectIfHigherGeneration(
635       page_obj_num, pdfium::MakeRetain<CPDF_Null>());
636   CHECK(replaced);
637 }
638 
SetRootForTesting(RetainPtr<CPDF_Dictionary> root)639 void CPDF_Document::SetRootForTesting(RetainPtr<CPDF_Dictionary> root) {
640   m_pRootDict = std::move(root);
641 }
642 
MovePages(pdfium::span<const int> page_indices,int dest_page_index)643 bool CPDF_Document::MovePages(pdfium::span<const int> page_indices,
644                               int dest_page_index) {
645   const CPDF_Dictionary* pages = GetPagesDict();
646   const int num_pages_signed = pages ? pages->GetIntegerFor("Count") : 0;
647   if (num_pages_signed <= 0) {
648     return false;
649   }
650   const size_t num_pages = num_pages_signed;
651 
652   // Check the number of pages is in range.
653   if (page_indices.empty() || page_indices.size() > num_pages) {
654     return false;
655   }
656 
657   // Check that destination page index is in range.
658   if (dest_page_index < 0 ||
659       static_cast<size_t>(dest_page_index) > num_pages - page_indices.size()) {
660     return false;
661   }
662 
663   // Check for if XFA is enabled.
664   Extension* extension = GetExtension();
665   if (extension && extension->ContainsExtensionForm()) {
666     // Don't manipulate XFA PDFs.
667     return false;
668   }
669 
670   // Check for duplicate and out-of-range page indices
671   std::set<int> unique_page_indices;
672   // Store the pages that need to be moved. They'll be deleted then reinserted.
673   std::vector<RetainPtr<CPDF_Dictionary>> pages_to_move;
674   pages_to_move.reserve(page_indices.size());
675   // Store the page indices that will be deleted (and moved).
676   std::vector<int> page_indices_to_delete;
677   page_indices_to_delete.reserve(page_indices.size());
678   for (const int page_index : page_indices) {
679     bool inserted = unique_page_indices.insert(page_index).second;
680     if (!inserted) {
681       // Duplicate page index found
682       return false;
683     }
684     RetainPtr<CPDF_Dictionary> page = GetMutablePageDictionary(page_index);
685     if (!page) {
686       // Page not found, index might be out of range.
687       return false;
688     }
689     pages_to_move.push_back(std::move(page));
690     page_indices_to_delete.push_back(page_index);
691   }
692 
693   // Sort the page indices to be deleted in descending order.
694   std::sort(page_indices_to_delete.begin(), page_indices_to_delete.end(),
695             std::greater<int>());
696   // Delete the pages in descending order.
697   if (extension) {
698     for (int page_index : page_indices_to_delete) {
699       extension->DeletePage(page_index);
700     }
701   } else {
702     for (int page_index : page_indices_to_delete) {
703       DeletePage(page_index);
704     }
705   }
706 
707   // Insert the deleted pages back into the document at the destination page
708   // index.
709   for (size_t i = 0; i < pages_to_move.size(); ++i) {
710     if (!InsertNewPage(i + dest_page_index, pages_to_move[i])) {
711       // Fail in an indeterminate state.
712       return false;
713     }
714   }
715 
716   return true;
717 }
718 
ResizePageListForTesting(size_t size)719 void CPDF_Document::ResizePageListForTesting(size_t size) {
720   m_PageList.resize(size);
721 }
722 
StockFontClearer(CPDF_Document::PageDataIface * pPageData)723 CPDF_Document::StockFontClearer::StockFontClearer(
724     CPDF_Document::PageDataIface* pPageData)
725     : m_pPageData(pPageData) {}
726 
~StockFontClearer()727 CPDF_Document::StockFontClearer::~StockFontClearer() {
728   m_pPageData->ClearStockFont();
729 }
730 
731 CPDF_Document::PageDataIface::PageDataIface() = default;
732 
733 CPDF_Document::PageDataIface::~PageDataIface() = default;
734 
735 CPDF_Document::RenderDataIface::RenderDataIface() = default;
736 
737 CPDF_Document::RenderDataIface::~RenderDataIface() = default;
738