1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_document.h"
8
9 #include <set>
10 #include <utility>
11 #include <vector>
12
13 #include "build/build_config.h"
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
17 #include "core/fpdfapi/parser/cpdf_name.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_parser.h"
20 #include "core/fpdfapi/parser/cpdf_read_validator.h"
21 #include "core/fpdfapi/parser/cpdf_reference.h"
22 #include "core/fpdfapi/parser/cpdf_stream.h"
23 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
24 #include "core/fpdfapi/parser/cpdf_string.h"
25 #include "core/fxcodec/jbig2/JBig2_DocumentContext.h"
26 #include "core/fxcrt/fx_codepage.h"
27 #include "third_party/base/ptr_util.h"
28 #include "third_party/base/stl_util.h"
29
30 namespace {
31
32 const int kMaxPageLevel = 1024;
33
CountPages(CPDF_Dictionary * pPages,std::set<CPDF_Dictionary * > * visited_pages)34 int CountPages(CPDF_Dictionary* pPages,
35 std::set<CPDF_Dictionary*>* visited_pages) {
36 int count = pPages->GetIntegerFor("Count");
37 if (count > 0 && count < CPDF_Document::kPageMaxNum)
38 return count;
39 CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
40 if (!pKidList)
41 return 0;
42 count = 0;
43 for (size_t i = 0; i < pKidList->size(); i++) {
44 CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
45 if (!pKid || pdfium::ContainsKey(*visited_pages, pKid))
46 continue;
47 if (pKid->KeyExist("Kids")) {
48 // Use |visited_pages| to help detect circular references of pages.
49 pdfium::ScopedSetInsertion<CPDF_Dictionary*> local_add(visited_pages,
50 pKid);
51 count += CountPages(pKid, visited_pages);
52 } else {
53 // This page is a leaf node.
54 count++;
55 }
56 }
57 pPages->SetNewFor<CPDF_Number>("Count", count);
58 return count;
59 }
60
61 } // namespace
62
CPDF_Document(std::unique_ptr<RenderDataIface> pRenderData,std::unique_ptr<PageDataIface> pPageData)63 CPDF_Document::CPDF_Document(std::unique_ptr<RenderDataIface> pRenderData,
64 std::unique_ptr<PageDataIface> pPageData)
65 : m_pDocRender(std::move(pRenderData)),
66 m_pDocPage(std::move(pPageData)),
67 m_StockFontClearer(m_pDocPage.get()) {
68 m_pDocRender->SetDocument(this);
69 m_pDocPage->SetDocument(this);
70 }
71
72 CPDF_Document::~CPDF_Document() = default;
73
ParseIndirectObject(uint32_t objnum)74 RetainPtr<CPDF_Object> CPDF_Document::ParseIndirectObject(uint32_t objnum) {
75 return m_pParser ? m_pParser->ParseIndirectObject(objnum) : nullptr;
76 }
77
TryInit()78 bool CPDF_Document::TryInit() {
79 SetLastObjNum(m_pParser->GetLastObjNum());
80
81 CPDF_Object* pRootObj = GetOrParseIndirectObject(m_pParser->GetRootObjNum());
82 if (pRootObj)
83 m_pRootDict.Reset(pRootObj->GetDict());
84
85 LoadPages();
86 return GetRoot() && GetPageCount() > 0;
87 }
88
LoadDoc(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,const char * password)89 CPDF_Parser::Error CPDF_Document::LoadDoc(
90 const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
91 const char* password) {
92 if (!m_pParser)
93 SetParser(pdfium::MakeUnique<CPDF_Parser>(this));
94
95 return HandleLoadResult(m_pParser->StartParse(pFileAccess, password));
96 }
97
LoadLinearizedDoc(const RetainPtr<CPDF_ReadValidator> & validator,const char * password)98 CPDF_Parser::Error CPDF_Document::LoadLinearizedDoc(
99 const RetainPtr<CPDF_ReadValidator>& validator,
100 const char* password) {
101 if (!m_pParser)
102 SetParser(pdfium::MakeUnique<CPDF_Parser>(this));
103
104 return HandleLoadResult(m_pParser->StartLinearizedParse(validator, password));
105 }
106
LoadPages()107 void CPDF_Document::LoadPages() {
108 const CPDF_LinearizedHeader* linearized_header =
109 m_pParser->GetLinearizedHeader();
110 if (!linearized_header) {
111 m_PageList.resize(RetrievePageCount());
112 return;
113 }
114
115 m_PageList.resize(linearized_header->GetPageCount());
116 ASSERT(linearized_header->GetFirstPageNo() < m_PageList.size());
117 m_PageList[linearized_header->GetFirstPageNo()] =
118 linearized_header->GetFirstPageObjNum();
119 }
120
TraversePDFPages(int iPage,int * nPagesToGo,size_t level)121 CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage,
122 int* nPagesToGo,
123 size_t level) {
124 if (*nPagesToGo < 0 || m_bReachedMaxPageLevel)
125 return nullptr;
126
127 CPDF_Dictionary* pPages = m_pTreeTraversal[level].first;
128 CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
129 if (!pKidList) {
130 m_pTreeTraversal.pop_back();
131 if (*nPagesToGo != 1)
132 return nullptr;
133 m_PageList[iPage] = pPages->GetObjNum();
134 return pPages;
135 }
136 if (level >= kMaxPageLevel) {
137 m_pTreeTraversal.pop_back();
138 m_bReachedMaxPageLevel = true;
139 return nullptr;
140 }
141 CPDF_Dictionary* page = nullptr;
142 for (size_t i = m_pTreeTraversal[level].second; i < pKidList->size(); i++) {
143 if (*nPagesToGo == 0)
144 break;
145 pKidList->ConvertToIndirectObjectAt(i, this);
146 CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
147 if (!pKid) {
148 (*nPagesToGo)--;
149 m_pTreeTraversal[level].second++;
150 continue;
151 }
152 if (pKid == pPages) {
153 m_pTreeTraversal[level].second++;
154 continue;
155 }
156 if (!pKid->KeyExist("Kids")) {
157 m_PageList[iPage - (*nPagesToGo) + 1] = pKid->GetObjNum();
158 (*nPagesToGo)--;
159 m_pTreeTraversal[level].second++;
160 if (*nPagesToGo == 0) {
161 page = pKid;
162 break;
163 }
164 } else {
165 // If the vector has size level+1, the child is not in yet
166 if (m_pTreeTraversal.size() == level + 1)
167 m_pTreeTraversal.push_back(std::make_pair(pKid, 0));
168 // Now m_pTreeTraversal[level+1] should exist and be equal to pKid.
169 CPDF_Dictionary* pageKid = TraversePDFPages(iPage, nPagesToGo, level + 1);
170 // Check if child was completely processed, i.e. it popped itself out
171 if (m_pTreeTraversal.size() == level + 1)
172 m_pTreeTraversal[level].second++;
173 // If child did not finish, no pages to go, or max level reached, end
174 if (m_pTreeTraversal.size() != level + 1 || *nPagesToGo == 0 ||
175 m_bReachedMaxPageLevel) {
176 page = pageKid;
177 break;
178 }
179 }
180 }
181 if (m_pTreeTraversal[level].second == pKidList->size())
182 m_pTreeTraversal.pop_back();
183 return page;
184 }
185
ResetTraversal()186 void CPDF_Document::ResetTraversal() {
187 m_iNextPageToTraverse = 0;
188 m_bReachedMaxPageLevel = false;
189 m_pTreeTraversal.clear();
190 }
191
SetParser(std::unique_ptr<CPDF_Parser> pParser)192 void CPDF_Document::SetParser(std::unique_ptr<CPDF_Parser> pParser) {
193 ASSERT(!m_pParser);
194 m_pParser = std::move(pParser);
195 }
196
HandleLoadResult(CPDF_Parser::Error error)197 CPDF_Parser::Error CPDF_Document::HandleLoadResult(CPDF_Parser::Error error) {
198 if (error == CPDF_Parser::SUCCESS)
199 m_bHasValidCrossReferenceTable = !m_pParser->xref_table_rebuilt();
200 return error;
201 }
202
GetPagesDict() const203 const CPDF_Dictionary* CPDF_Document::GetPagesDict() const {
204 const CPDF_Dictionary* pRoot = GetRoot();
205 return pRoot ? pRoot->GetDictFor("Pages") : nullptr;
206 }
207
GetPagesDict()208 CPDF_Dictionary* CPDF_Document::GetPagesDict() {
209 return const_cast<CPDF_Dictionary*>(
210 static_cast<const CPDF_Document*>(this)->GetPagesDict());
211 }
212
IsPageLoaded(int iPage) const213 bool CPDF_Document::IsPageLoaded(int iPage) const {
214 return !!m_PageList[iPage];
215 }
216
GetPageDictionary(int iPage)217 CPDF_Dictionary* CPDF_Document::GetPageDictionary(int iPage) {
218 if (!pdfium::IndexInBounds(m_PageList, iPage))
219 return nullptr;
220
221 const uint32_t objnum = m_PageList[iPage];
222 if (objnum) {
223 CPDF_Dictionary* result = ToDictionary(GetOrParseIndirectObject(objnum));
224 if (result)
225 return result;
226 }
227
228 CPDF_Dictionary* pPages = GetPagesDict();
229 if (!pPages)
230 return nullptr;
231
232 if (m_pTreeTraversal.empty()) {
233 ResetTraversal();
234 m_pTreeTraversal.push_back(std::make_pair(pPages, 0));
235 }
236 int nPagesToGo = iPage - m_iNextPageToTraverse + 1;
237 CPDF_Dictionary* pPage = TraversePDFPages(iPage, &nPagesToGo, 0);
238 m_iNextPageToTraverse = iPage + 1;
239 return pPage;
240 }
241
SetPageObjNum(int iPage,uint32_t objNum)242 void CPDF_Document::SetPageObjNum(int iPage, uint32_t objNum) {
243 m_PageList[iPage] = objNum;
244 }
245
FindPageIndex(const CPDF_Dictionary * pNode,uint32_t * skip_count,uint32_t objnum,int * index,int level) const246 int CPDF_Document::FindPageIndex(const CPDF_Dictionary* pNode,
247 uint32_t* skip_count,
248 uint32_t objnum,
249 int* index,
250 int level) const {
251 if (!pNode->KeyExist("Kids")) {
252 if (objnum == pNode->GetObjNum())
253 return *index;
254
255 if (*skip_count)
256 (*skip_count)--;
257
258 (*index)++;
259 return -1;
260 }
261
262 const CPDF_Array* pKidList = pNode->GetArrayFor("Kids");
263 if (!pKidList)
264 return -1;
265
266 if (level >= kMaxPageLevel)
267 return -1;
268
269 size_t count = pNode->GetIntegerFor("Count");
270 if (count <= *skip_count) {
271 (*skip_count) -= count;
272 (*index) += count;
273 return -1;
274 }
275
276 if (count && count == pKidList->size()) {
277 for (size_t i = 0; i < count; i++) {
278 const CPDF_Reference* pKid = ToReference(pKidList->GetObjectAt(i));
279 if (pKid && pKid->GetRefObjNum() == objnum)
280 return static_cast<int>(*index + i);
281 }
282 }
283
284 for (size_t i = 0; i < pKidList->size(); i++) {
285 const CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
286 if (!pKid || pKid == pNode)
287 continue;
288
289 int found_index = FindPageIndex(pKid, skip_count, objnum, index, level + 1);
290 if (found_index >= 0)
291 return found_index;
292 }
293 return -1;
294 }
295
GetPageIndex(uint32_t objnum)296 int CPDF_Document::GetPageIndex(uint32_t objnum) {
297 uint32_t nPages = m_PageList.size();
298 uint32_t skip_count = 0;
299 bool bSkipped = false;
300 for (uint32_t i = 0; i < nPages; i++) {
301 if (m_PageList[i] == objnum)
302 return i;
303
304 if (!bSkipped && m_PageList[i] == 0) {
305 skip_count = i;
306 bSkipped = true;
307 }
308 }
309 const CPDF_Dictionary* pPages = GetPagesDict();
310 if (!pPages)
311 return -1;
312
313 int start_index = 0;
314 int found_index = FindPageIndex(pPages, &skip_count, objnum, &start_index, 0);
315
316 // Corrupt page tree may yield out-of-range results.
317 if (!pdfium::IndexInBounds(m_PageList, found_index))
318 return -1;
319
320 m_PageList[found_index] = objnum;
321 return found_index;
322 }
323
GetPageCount() const324 int CPDF_Document::GetPageCount() const {
325 return pdfium::CollectionSize<int>(m_PageList);
326 }
327
RetrievePageCount()328 int CPDF_Document::RetrievePageCount() {
329 CPDF_Dictionary* pPages = GetPagesDict();
330 if (!pPages)
331 return 0;
332
333 if (!pPages->KeyExist("Kids"))
334 return 1;
335
336 std::set<CPDF_Dictionary*> visited_pages;
337 visited_pages.insert(pPages);
338 return CountPages(pPages, &visited_pages);
339 }
340
GetUserPermissions() const341 uint32_t CPDF_Document::GetUserPermissions() const {
342 if (m_pParser)
343 return m_pParser->GetPermissions();
344
345 return m_pExtension ? m_pExtension->GetUserPermissions() : 0;
346 }
347
CreateNewDoc()348 void CPDF_Document::CreateNewDoc() {
349 ASSERT(!m_pRootDict);
350 ASSERT(!m_pInfoDict);
351 m_pRootDict.Reset(NewIndirect<CPDF_Dictionary>());
352 m_pRootDict->SetNewFor<CPDF_Name>("Type", "Catalog");
353
354 CPDF_Dictionary* pPages = NewIndirect<CPDF_Dictionary>();
355 pPages->SetNewFor<CPDF_Name>("Type", "Pages");
356 pPages->SetNewFor<CPDF_Number>("Count", 0);
357 pPages->SetNewFor<CPDF_Array>("Kids");
358 m_pRootDict->SetNewFor<CPDF_Reference>("Pages", this, pPages->GetObjNum());
359 m_pInfoDict.Reset(NewIndirect<CPDF_Dictionary>());
360 }
361
CreateNewPage(int iPage)362 CPDF_Dictionary* CPDF_Document::CreateNewPage(int iPage) {
363 CPDF_Dictionary* pDict = NewIndirect<CPDF_Dictionary>();
364 pDict->SetNewFor<CPDF_Name>("Type", "Page");
365 uint32_t dwObjNum = pDict->GetObjNum();
366 if (!InsertNewPage(iPage, pDict)) {
367 DeleteIndirectObject(dwObjNum);
368 return nullptr;
369 }
370 return pDict;
371 }
372
InsertDeletePDFPage(CPDF_Dictionary * pPages,int nPagesToGo,CPDF_Dictionary * pPageDict,bool bInsert,std::set<CPDF_Dictionary * > * pVisited)373 bool CPDF_Document::InsertDeletePDFPage(CPDF_Dictionary* pPages,
374 int nPagesToGo,
375 CPDF_Dictionary* pPageDict,
376 bool bInsert,
377 std::set<CPDF_Dictionary*>* pVisited) {
378 CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
379 if (!pKidList)
380 return false;
381
382 for (size_t i = 0; i < pKidList->size(); i++) {
383 CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
384 if (pKid->GetStringFor("Type") == "Page") {
385 if (nPagesToGo != 0) {
386 nPagesToGo--;
387 continue;
388 }
389 if (bInsert) {
390 pKidList->InsertNewAt<CPDF_Reference>(i, this, pPageDict->GetObjNum());
391 pPageDict->SetNewFor<CPDF_Reference>("Parent", this,
392 pPages->GetObjNum());
393 } else {
394 pKidList->RemoveAt(i);
395 }
396 pPages->SetNewFor<CPDF_Number>(
397 "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
398 ResetTraversal();
399 break;
400 }
401 int nPages = pKid->GetIntegerFor("Count");
402 if (nPagesToGo >= nPages) {
403 nPagesToGo -= nPages;
404 continue;
405 }
406 if (pdfium::ContainsKey(*pVisited, pKid))
407 return false;
408
409 pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);
410 if (!InsertDeletePDFPage(pKid, nPagesToGo, pPageDict, bInsert, pVisited))
411 return false;
412
413 pPages->SetNewFor<CPDF_Number>(
414 "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
415 break;
416 }
417 return true;
418 }
419
InsertNewPage(int iPage,CPDF_Dictionary * pPageDict)420 bool CPDF_Document::InsertNewPage(int iPage, CPDF_Dictionary* pPageDict) {
421 CPDF_Dictionary* pRoot = GetRoot();
422 CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;
423 if (!pPages)
424 return false;
425
426 int nPages = GetPageCount();
427 if (iPage < 0 || iPage > nPages)
428 return false;
429
430 if (iPage == nPages) {
431 CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");
432 if (!pPagesList)
433 pPagesList = pPages->SetNewFor<CPDF_Array>("Kids");
434 pPagesList->AddNew<CPDF_Reference>(this, pPageDict->GetObjNum());
435 pPages->SetNewFor<CPDF_Number>("Count", nPages + 1);
436 pPageDict->SetNewFor<CPDF_Reference>("Parent", this, pPages->GetObjNum());
437 ResetTraversal();
438 } else {
439 std::set<CPDF_Dictionary*> stack = {pPages};
440 if (!InsertDeletePDFPage(pPages, iPage, pPageDict, true, &stack))
441 return false;
442 }
443 m_PageList.insert(m_PageList.begin() + iPage, pPageDict->GetObjNum());
444 return true;
445 }
446
GetInfo()447 CPDF_Dictionary* CPDF_Document::GetInfo() {
448 if (m_pInfoDict)
449 return m_pInfoDict.Get();
450
451 if (!m_pParser || !m_pParser->GetInfoObjNum())
452 return nullptr;
453
454 auto ref =
455 pdfium::MakeRetain<CPDF_Reference>(this, m_pParser->GetInfoObjNum());
456 m_pInfoDict.Reset(ToDictionary(ref->GetDirect()));
457 return m_pInfoDict.Get();
458 }
459
DeletePage(int iPage)460 void CPDF_Document::DeletePage(int iPage) {
461 CPDF_Dictionary* pPages = GetPagesDict();
462 if (!pPages)
463 return;
464
465 int nPages = pPages->GetIntegerFor("Count");
466 if (iPage < 0 || iPage >= nPages)
467 return;
468
469 std::set<CPDF_Dictionary*> stack = {pPages};
470 if (!InsertDeletePDFPage(pPages, iPage, nullptr, false, &stack))
471 return;
472
473 m_PageList.erase(m_PageList.begin() + iPage);
474 }
475
StockFontClearer(CPDF_Document::PageDataIface * pPageData)476 CPDF_Document::StockFontClearer::StockFontClearer(
477 CPDF_Document::PageDataIface* pPageData)
478 : m_pPageData(pPageData) {}
479
~StockFontClearer()480 CPDF_Document::StockFontClearer::~StockFontClearer() {
481 m_pPageData->ClearStockFont();
482 }
483
484 CPDF_Document::PageDataIface::PageDataIface() = default;
485
486 CPDF_Document::PageDataIface::~PageDataIface() = default;
487
488 CPDF_Document::RenderDataIface::RenderDataIface() = default;
489
490 CPDF_Document::RenderDataIface::~RenderDataIface() = default;
491