1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
18 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_page_object_avail.h"
22 #include "core/fpdfapi/parser/cpdf_read_validator.h"
23 #include "core/fpdfapi/parser/cpdf_reference.h"
24 #include "core/fpdfapi/parser/cpdf_stream.h"
25 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcrt/fx_extension.h"
28 #include "core/fxcrt/fx_safe_types.h"
29 #include "third_party/base/compiler_specific.h"
30 #include "third_party/base/numerics/safe_conversions.h"
31 #include "third_party/base/ptr_util.h"
32 #include "third_party/base/stl_util.h"
33
34 namespace {
35
36 // static
GetResourceObject(CPDF_Dictionary * pDict)37 CPDF_Object* GetResourceObject(CPDF_Dictionary* pDict) {
38 constexpr size_t kMaxHierarchyDepth = 64;
39 size_t depth = 0;
40
41 CPDF_Dictionary* dictionary_to_check = pDict;
42 while (dictionary_to_check) {
43 CPDF_Object* result = dictionary_to_check->GetObjectFor("Resources");
44 if (result)
45 return result;
46 CPDF_Object* parent = dictionary_to_check->GetObjectFor("Parent");
47 dictionary_to_check = parent ? parent->GetDict() : nullptr;
48
49 if (++depth > kMaxHierarchyDepth) {
50 // We have cycle in parents hierarchy.
51 return nullptr;
52 }
53 }
54 return nullptr;
55 }
56
57 class HintsScope {
58 public:
HintsScope(RetainPtr<CPDF_ReadValidator> validator,CPDF_DataAvail::DownloadHints * hints)59 HintsScope(RetainPtr<CPDF_ReadValidator> validator,
60 CPDF_DataAvail::DownloadHints* hints)
61 : validator_(std::move(validator)) {
62 ASSERT(validator_);
63 validator_->SetDownloadHints(hints);
64 }
65
~HintsScope()66 ~HintsScope() { validator_->SetDownloadHints(nullptr); }
67
68 private:
69 RetainPtr<CPDF_ReadValidator> validator_;
70 };
71
72 } // namespace
73
~FileAvail()74 CPDF_DataAvail::FileAvail::~FileAvail() {}
75
~DownloadHints()76 CPDF_DataAvail::DownloadHints::~DownloadHints() {}
77
CPDF_DataAvail(FileAvail * pFileAvail,const RetainPtr<IFX_SeekableReadStream> & pFileRead,bool bSupportHintTable)78 CPDF_DataAvail::CPDF_DataAvail(
79 FileAvail* pFileAvail,
80 const RetainPtr<IFX_SeekableReadStream>& pFileRead,
81 bool bSupportHintTable)
82 : m_pFileRead(
83 pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, pFileAvail)),
84 m_dwFileLen(m_pFileRead->GetSize()),
85 m_bSupportHintTable(bSupportHintTable) {}
86
~CPDF_DataAvail()87 CPDF_DataAvail::~CPDF_DataAvail() {
88 m_pHintTables.reset();
89 if (m_pDocument)
90 m_pDocument->RemoveObserver(this);
91 }
92
OnObservableDestroyed()93 void CPDF_DataAvail::OnObservableDestroyed() {
94 m_pDocument = nullptr;
95 m_pFormAvail.reset();
96 m_PagesArray.clear();
97 m_PagesObjAvail.clear();
98 m_PagesResourcesAvail.clear();
99 }
100
IsDocAvail(DownloadHints * pHints)101 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
102 DownloadHints* pHints) {
103 if (!m_dwFileLen)
104 return DataError;
105
106 const HintsScope hints_scope(GetValidator(), pHints);
107 while (!m_bDocAvail) {
108 if (!CheckDocStatus())
109 return DataNotAvailable;
110 }
111
112 return DataAvailable;
113 }
114
CheckDocStatus()115 bool CPDF_DataAvail::CheckDocStatus() {
116 switch (m_docStatus) {
117 case PDF_DATAAVAIL_HEADER:
118 return CheckHeader();
119 case PDF_DATAAVAIL_FIRSTPAGE:
120 return CheckFirstPage();
121 case PDF_DATAAVAIL_HINTTABLE:
122 return CheckHintTables();
123 case PDF_DATAAVAIL_LOADALLCROSSREF:
124 return CheckAndLoadAllXref();
125 case PDF_DATAAVAIL_LOADALLFILE:
126 return LoadAllFile();
127 case PDF_DATAAVAIL_ROOT:
128 return CheckRoot();
129 case PDF_DATAAVAIL_INFO:
130 return CheckInfo();
131 case PDF_DATAAVAIL_PAGETREE:
132 if (m_bTotalLoadPageTree)
133 return CheckPages();
134 return LoadDocPages();
135 case PDF_DATAAVAIL_PAGE:
136 if (m_bTotalLoadPageTree)
137 return CheckPage();
138 m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
139 return true;
140 case PDF_DATAAVAIL_ERROR:
141 return LoadAllFile();
142 case PDF_DATAAVAIL_PAGE_LATERLOAD:
143 m_docStatus = PDF_DATAAVAIL_PAGE;
144 FALLTHROUGH;
145 default:
146 m_bDocAvail = true;
147 return true;
148 }
149 }
150
CheckPageStatus()151 bool CPDF_DataAvail::CheckPageStatus() {
152 switch (m_docStatus) {
153 case PDF_DATAAVAIL_PAGETREE:
154 return CheckPages();
155 case PDF_DATAAVAIL_PAGE:
156 return CheckPage();
157 case PDF_DATAAVAIL_ERROR:
158 return LoadAllFile();
159 default:
160 m_bPagesTreeLoad = true;
161 m_bPagesLoad = true;
162 return true;
163 }
164 }
165
LoadAllFile()166 bool CPDF_DataAvail::LoadAllFile() {
167 if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
168 m_docStatus = PDF_DATAAVAIL_DONE;
169 return true;
170 }
171 return false;
172 }
173
CheckAndLoadAllXref()174 bool CPDF_DataAvail::CheckAndLoadAllXref() {
175 if (!m_pCrossRefAvail) {
176 const CPDF_ReadValidator::Session read_session(GetValidator());
177 const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
178 if (GetValidator()->has_read_problems())
179 return false;
180
181 if (last_xref_offset <= 0) {
182 m_docStatus = PDF_DATAAVAIL_ERROR;
183 return false;
184 }
185
186 m_pCrossRefAvail = pdfium::MakeUnique<CPDF_CrossRefAvail>(GetSyntaxParser(),
187 last_xref_offset);
188 }
189
190 switch (m_pCrossRefAvail->CheckAvail()) {
191 case DocAvailStatus::DataAvailable:
192 break;
193 case DocAvailStatus::DataNotAvailable:
194 return false;
195 case DocAvailStatus::DataError:
196 m_docStatus = PDF_DATAAVAIL_ERROR;
197 return false;
198 default:
199 NOTREACHED();
200 return false;
201 }
202
203 if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
204 !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
205 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
206 return false;
207 }
208
209 m_docStatus = PDF_DATAAVAIL_ROOT;
210 return true;
211 }
212
GetObject(uint32_t objnum,bool * pExistInFile)213 RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
214 bool* pExistInFile) {
215 CPDF_Parser* pParser = nullptr;
216
217 if (pExistInFile)
218 *pExistInFile = true;
219
220 pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
221
222 RetainPtr<CPDF_Object> pRet;
223 if (pParser) {
224 const CPDF_ReadValidator::Session read_session(GetValidator());
225 pRet = pParser->ParseIndirectObject(objnum);
226 if (GetValidator()->has_read_problems())
227 return nullptr;
228 }
229
230 if (!pRet && pExistInFile)
231 *pExistInFile = false;
232
233 return pRet;
234 }
235
CheckInfo()236 bool CPDF_DataAvail::CheckInfo() {
237 const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum();
238 if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) {
239 m_docStatus = PDF_DATAAVAIL_PAGETREE;
240 return true;
241 }
242
243 const CPDF_ReadValidator::Session read_session(GetValidator());
244 m_parser.ParseIndirectObject(dwInfoObjNum);
245 if (GetValidator()->has_read_problems())
246 return false;
247
248 m_docStatus = PDF_DATAAVAIL_PAGETREE;
249 return true;
250 }
251
CheckRoot()252 bool CPDF_DataAvail::CheckRoot() {
253 const uint32_t dwRootObjNum = m_parser.GetRootObjNum();
254 if (dwRootObjNum == CPDF_Object::kInvalidObjNum) {
255 m_docStatus = PDF_DATAAVAIL_ERROR;
256 return true;
257 }
258
259 const CPDF_ReadValidator::Session read_session(GetValidator());
260 m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum));
261 if (GetValidator()->has_read_problems())
262 return false;
263
264 const CPDF_Reference* pRef =
265 ToReference(m_pRoot ? m_pRoot->GetObjectFor("Pages") : nullptr);
266 if (!pRef) {
267 m_docStatus = PDF_DATAAVAIL_ERROR;
268 return false;
269 }
270
271 m_PagesObjNum = pRef->GetRefObjNum();
272 m_docStatus = PDF_DATAAVAIL_INFO;
273 return true;
274 }
275
PreparePageItem()276 bool CPDF_DataAvail::PreparePageItem() {
277 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
278 const CPDF_Reference* pRef =
279 ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
280 if (!pRef) {
281 m_docStatus = PDF_DATAAVAIL_ERROR;
282 return false;
283 }
284
285 m_PagesObjNum = pRef->GetRefObjNum();
286 m_docStatus = PDF_DATAAVAIL_PAGETREE;
287 return true;
288 }
289
IsFirstCheck(uint32_t dwPage)290 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
291 return m_pageMapCheckState.insert(dwPage).second;
292 }
293
ResetFirstCheck(uint32_t dwPage)294 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
295 m_pageMapCheckState.erase(dwPage);
296 }
297
CheckPage()298 bool CPDF_DataAvail::CheckPage() {
299 std::vector<uint32_t> UnavailObjList;
300 for (uint32_t dwPageObjNum : m_PageObjList) {
301 bool bExists = false;
302 RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
303 if (!pObj) {
304 if (bExists)
305 UnavailObjList.push_back(dwPageObjNum);
306 continue;
307 }
308 CPDF_Array* pArray = ToArray(pObj.Get());
309 if (pArray) {
310 CPDF_ArrayLocker locker(pArray);
311 for (const auto& pArrayObj : locker) {
312 if (CPDF_Reference* pRef = ToReference(pArrayObj.Get()))
313 UnavailObjList.push_back(pRef->GetRefObjNum());
314 }
315 }
316 if (!pObj->IsDictionary())
317 continue;
318
319 ByteString type = pObj->GetDict()->GetStringFor("Type");
320 if (type == "Pages") {
321 m_PagesArray.push_back(std::move(pObj));
322 continue;
323 }
324 }
325 m_PageObjList.clear();
326 if (!UnavailObjList.empty()) {
327 m_PageObjList = std::move(UnavailObjList);
328 return false;
329 }
330 size_t iPages = m_PagesArray.size();
331 for (size_t i = 0; i < iPages; ++i) {
332 RetainPtr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
333 if (pPages && !GetPageKids(pPages.Get())) {
334 m_PagesArray.clear();
335 m_docStatus = PDF_DATAAVAIL_ERROR;
336 return false;
337 }
338 }
339 m_PagesArray.clear();
340 if (m_PageObjList.empty())
341 m_docStatus = PDF_DATAAVAIL_DONE;
342
343 return true;
344 }
345
GetPageKids(CPDF_Object * pPages)346 bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) {
347 CPDF_Dictionary* pDict = pPages->GetDict();
348 CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
349 if (!pKids)
350 return true;
351
352 switch (pKids->GetType()) {
353 case CPDF_Object::kReference:
354 m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum());
355 break;
356 case CPDF_Object::kArray: {
357 CPDF_Array* pKidsArray = pKids->AsArray();
358 for (size_t i = 0; i < pKidsArray->size(); ++i) {
359 if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i)))
360 m_PageObjList.push_back(pRef->GetRefObjNum());
361 }
362 break;
363 }
364 default:
365 m_docStatus = PDF_DATAAVAIL_ERROR;
366 return false;
367 }
368 return true;
369 }
370
CheckPages()371 bool CPDF_DataAvail::CheckPages() {
372 bool bExists = false;
373 RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
374 if (!bExists) {
375 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
376 return true;
377 }
378
379 if (!pPages) {
380 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
381 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
382 return true;
383 }
384 return false;
385 }
386
387 if (!GetPageKids(pPages.Get())) {
388 m_docStatus = PDF_DATAAVAIL_ERROR;
389 return false;
390 }
391
392 m_docStatus = PDF_DATAAVAIL_PAGE;
393 return true;
394 }
395
CheckHeader()396 bool CPDF_DataAvail::CheckHeader() {
397 switch (CheckHeaderAndLinearized()) {
398 case DocAvailStatus::DataAvailable:
399 m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE
400 : PDF_DATAAVAIL_LOADALLCROSSREF;
401 return true;
402 case DocAvailStatus::DataNotAvailable:
403 return false;
404 case DocAvailStatus::DataError:
405 m_docStatus = PDF_DATAAVAIL_ERROR;
406 return true;
407 default:
408 NOTREACHED();
409 return false;
410 }
411 }
412
CheckFirstPage()413 bool CPDF_DataAvail::CheckFirstPage() {
414 if (!m_pLinearized->GetFirstPageEndOffset() ||
415 !m_pLinearized->GetFileSize() ||
416 !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
417 m_docStatus = PDF_DATAAVAIL_ERROR;
418 return false;
419 }
420
421 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
422 dwEnd += 512;
423 if ((FX_FILESIZE)dwEnd > m_dwFileLen)
424 dwEnd = (uint32_t)m_dwFileLen;
425
426 const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
427 const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
428 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
429 data_size))
430 return false;
431
432 m_docStatus =
433 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
434 return true;
435 }
436
CheckHintTables()437 bool CPDF_DataAvail::CheckHintTables() {
438 const CPDF_ReadValidator::Session read_session(GetValidator());
439 m_pHintTables =
440 CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get());
441
442 if (GetValidator()->read_error()) {
443 m_docStatus = PDF_DATAAVAIL_ERROR;
444 return true;
445 }
446 if (GetValidator()->has_unavailable_data())
447 return false;
448
449 m_docStatus = PDF_DATAAVAIL_DONE;
450 return true;
451 }
452
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList) const453 RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
454 FX_FILESIZE pos,
455 uint32_t objnum,
456 CPDF_IndirectObjectHolder* pObjList) const {
457 const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
458 GetSyntaxParser()->SetPos(pos);
459 RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
460 pObjList, CPDF_SyntaxParser::ParseType::kLoose);
461 GetSyntaxParser()->SetPos(SavedPos);
462 return (result && (!objnum || result->GetObjNum() == objnum))
463 ? std::move(result)
464 : nullptr;
465 }
466
IsLinearizedPDF()467 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
468 switch (CheckHeaderAndLinearized()) {
469 case DocAvailStatus::DataAvailable:
470 return m_pLinearized ? DocLinearizationStatus::Linearized
471 : DocLinearizationStatus::NotLinearized;
472 case DocAvailStatus::DataNotAvailable:
473 return DocLinearizationStatus::LinearizationUnknown;
474 case DocAvailStatus::DataError:
475 return DocLinearizationStatus::NotLinearized;
476 default:
477 NOTREACHED();
478 return DocLinearizationStatus::LinearizationUnknown;
479 }
480 }
481
CheckHeaderAndLinearized()482 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
483 if (m_bHeaderAvail)
484 return DocAvailStatus::DataAvailable;
485
486 const CPDF_ReadValidator::Session read_session(GetValidator());
487 const Optional<FX_FILESIZE> header_offset = GetHeaderOffset(GetValidator());
488 if (GetValidator()->has_read_problems())
489 return DocAvailStatus::DataNotAvailable;
490
491 if (!header_offset)
492 return DocAvailStatus::DataError;
493
494 m_parser.m_pSyntax =
495 pdfium::MakeUnique<CPDF_SyntaxParser>(GetValidator(), *header_offset);
496 m_pLinearized = m_parser.ParseLinearizedHeader();
497 if (GetValidator()->has_read_problems())
498 return DocAvailStatus::DataNotAvailable;
499
500 m_bHeaderAvail = true;
501 return DocAvailStatus::DataAvailable;
502 }
503
CheckPage(uint32_t dwPage)504 bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
505 while (true) {
506 switch (m_docStatus) {
507 case PDF_DATAAVAIL_PAGETREE:
508 if (!LoadDocPages())
509 return false;
510 break;
511 case PDF_DATAAVAIL_PAGE:
512 if (!LoadDocPage(dwPage))
513 return false;
514 break;
515 case PDF_DATAAVAIL_ERROR:
516 return LoadAllFile();
517 default:
518 m_bPagesTreeLoad = true;
519 m_bPagesLoad = true;
520 m_bCurPageDictLoadOK = true;
521 m_docStatus = PDF_DATAAVAIL_PAGE;
522 return true;
523 }
524 }
525 }
526
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode)527 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
528 PageNode* pPageNode) {
529 bool bExists = false;
530 RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
531 if (!bExists) {
532 m_docStatus = PDF_DATAAVAIL_ERROR;
533 return false;
534 }
535
536 if (!pPages)
537 return false;
538
539 CPDF_Array* pArray = pPages->AsArray();
540 if (!pArray) {
541 m_docStatus = PDF_DATAAVAIL_ERROR;
542 return false;
543 }
544
545 pPageNode->m_type = PDF_PAGENODE_PAGES;
546 for (size_t i = 0; i < pArray->size(); ++i) {
547 CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
548 if (!pKid)
549 continue;
550
551 auto pNode = pdfium::MakeUnique<PageNode>();
552 pNode->m_dwPageNo = pKid->GetRefObjNum();
553 pPageNode->m_ChildNodes.push_back(std::move(pNode));
554 }
555 return true;
556 }
557
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode)558 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
559 PageNode* pPageNode) {
560 bool bExists = false;
561 RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
562 if (!bExists) {
563 m_docStatus = PDF_DATAAVAIL_ERROR;
564 return false;
565 }
566
567 if (!pPage)
568 return false;
569
570 if (pPage->IsArray()) {
571 pPageNode->m_dwPageNo = dwPageNo;
572 pPageNode->m_type = PDF_PAGENODE_ARRAY;
573 return true;
574 }
575
576 if (!pPage->IsDictionary()) {
577 m_docStatus = PDF_DATAAVAIL_ERROR;
578 return false;
579 }
580
581 pPageNode->m_dwPageNo = dwPageNo;
582 CPDF_Dictionary* pDict = pPage->GetDict();
583 const ByteString type = pDict->GetStringFor("Type");
584 if (type == "Page") {
585 pPageNode->m_type = PDF_PAGENODE_PAGE;
586 return true;
587 }
588
589 if (type != "Pages") {
590 m_docStatus = PDF_DATAAVAIL_ERROR;
591 return false;
592 }
593
594 pPageNode->m_type = PDF_PAGENODE_PAGES;
595 CPDF_Object* pKids = pDict->GetObjectFor("Kids");
596 if (!pKids) {
597 m_docStatus = PDF_DATAAVAIL_PAGE;
598 return true;
599 }
600
601 switch (pKids->GetType()) {
602 case CPDF_Object::kReference: {
603 CPDF_Reference* pKid = pKids->AsReference();
604 auto pNode = pdfium::MakeUnique<PageNode>();
605 pNode->m_dwPageNo = pKid->GetRefObjNum();
606 pPageNode->m_ChildNodes.push_back(std::move(pNode));
607 break;
608 }
609 case CPDF_Object::kArray: {
610 CPDF_Array* pKidsArray = pKids->AsArray();
611 for (size_t i = 0; i < pKidsArray->size(); ++i) {
612 CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
613 if (!pKid)
614 continue;
615
616 auto pNode = pdfium::MakeUnique<PageNode>();
617 pNode->m_dwPageNo = pKid->GetRefObjNum();
618 pPageNode->m_ChildNodes.push_back(std::move(pNode));
619 }
620 break;
621 }
622 default:
623 break;
624 }
625 return true;
626 }
627
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,int level)628 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
629 int32_t iPage,
630 int32_t& iCount,
631 int level) {
632 if (level >= kMaxPageRecursionDepth)
633 return false;
634
635 int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes);
636 if (iSize <= 0 || iPage >= iSize) {
637 m_docStatus = PDF_DATAAVAIL_ERROR;
638 return false;
639 }
640 for (int32_t i = 0; i < iSize; ++i) {
641 PageNode* pNode = pageNode.m_ChildNodes[i].get();
642 if (!pNode)
643 continue;
644
645 if (pNode->m_type == PDF_PAGENODE_UNKNOWN) {
646 // Updates the type for the unknown page node.
647 if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
648 return false;
649 }
650 if (pNode->m_type == PDF_PAGENODE_ARRAY) {
651 // Updates a more specific type for the array page node.
652 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
653 return false;
654 }
655 switch (pNode->m_type) {
656 case PDF_PAGENODE_PAGE:
657 iCount++;
658 if (iPage == iCount && m_pDocument)
659 m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
660 break;
661 case PDF_PAGENODE_PAGES:
662 if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
663 return false;
664 break;
665 case PDF_PAGENODE_UNKNOWN:
666 case PDF_PAGENODE_ARRAY:
667 // Already converted above, error if we get here.
668 return false;
669 }
670 if (iPage == iCount) {
671 m_docStatus = PDF_DATAAVAIL_DONE;
672 return true;
673 }
674 }
675 return true;
676 }
677
LoadDocPage(uint32_t dwPage)678 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
679 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
680 int32_t iPage = safePage.ValueOrDie();
681 if (m_pDocument->GetPageCount() <= iPage ||
682 m_pDocument->IsPageLoaded(iPage)) {
683 m_docStatus = PDF_DATAAVAIL_DONE;
684 return true;
685 }
686 if (m_PageNode.m_type == PDF_PAGENODE_PAGE) {
687 m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR;
688 return true;
689 }
690 int32_t iCount = -1;
691 return CheckPageNode(m_PageNode, iPage, iCount, 0);
692 }
693
CheckPageCount()694 bool CPDF_DataAvail::CheckPageCount() {
695 bool bExists = false;
696 RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
697 if (!bExists) {
698 m_docStatus = PDF_DATAAVAIL_ERROR;
699 return false;
700 }
701 if (!pPages)
702 return false;
703
704 CPDF_Dictionary* pPagesDict = pPages->GetDict();
705 if (!pPagesDict) {
706 m_docStatus = PDF_DATAAVAIL_ERROR;
707 return false;
708 }
709 if (!pPagesDict->KeyExist("Kids"))
710 return true;
711
712 return pPagesDict->GetIntegerFor("Count") > 0;
713 }
714
LoadDocPages()715 bool CPDF_DataAvail::LoadDocPages() {
716 if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
717 return false;
718
719 if (CheckPageCount()) {
720 m_docStatus = PDF_DATAAVAIL_PAGE;
721 return true;
722 }
723
724 m_bTotalLoadPageTree = true;
725 return false;
726 }
727
LoadPages()728 bool CPDF_DataAvail::LoadPages() {
729 while (!m_bPagesTreeLoad) {
730 if (!CheckPageStatus())
731 return false;
732 }
733
734 if (m_bPagesLoad)
735 return true;
736
737 m_pDocument->LoadPages();
738 return false;
739 }
740
CheckLinearizedData()741 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
742 if (m_bLinearedDataOK)
743 return DataAvailable;
744 ASSERT(m_pLinearized);
745 if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
746 !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
747 return DataError;
748 }
749
750 if (!m_bMainXRefLoadTried) {
751 const FX_SAFE_FILESIZE prev =
752 m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
753 const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
754 if (main_xref_offset < 0)
755 return DataError;
756
757 if (main_xref_offset == 0)
758 return DataAvailable;
759
760 FX_SAFE_SIZE_T data_size = m_dwFileLen;
761 data_size -= main_xref_offset;
762 if (!data_size.IsValid())
763 return DataError;
764
765 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
766 main_xref_offset, data_size.ValueOrDie()))
767 return DataNotAvailable;
768
769 CPDF_Parser::Error eRet =
770 m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
771 m_bMainXRefLoadTried = true;
772 if (eRet != CPDF_Parser::SUCCESS)
773 return DataError;
774
775 if (!PreparePageItem())
776 return DataNotAvailable;
777
778 m_bMainXRefLoadedOK = true;
779 m_bLinearedDataOK = true;
780 }
781
782 return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
783 }
784
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)785 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
786 uint32_t dwPage,
787 DownloadHints* pHints) {
788 if (!m_pDocument)
789 return DataError;
790
791 const FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
792 if (!safePage.IsValid())
793 return DataError;
794
795 if (safePage.ValueOrDie() >= m_pDocument->GetPageCount()) {
796 // This is XFA page.
797 return DataAvailable;
798 }
799
800 if (IsFirstCheck(dwPage)) {
801 m_bCurPageDictLoadOK = false;
802 }
803
804 if (pdfium::ContainsKey(m_pagesLoadState, dwPage))
805 return DataAvailable;
806
807 const HintsScope hints_scope(GetValidator(), pHints);
808 if (m_pLinearized) {
809 if (dwPage == m_pLinearized->GetFirstPageNo()) {
810 auto* pPageDict = m_pDocument->GetPageDictionary(safePage.ValueOrDie());
811 if (!pPageDict)
812 return DataError;
813
814 auto page_num_obj = std::make_pair(
815 dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
816 GetValidator(), m_pDocument.Get(), pPageDict));
817
818 CPDF_PageObjectAvail* page_obj_avail =
819 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
820 // TODO(art-snake): Check resources.
821 return page_obj_avail->CheckAvail();
822 }
823
824 DocAvailStatus nResult = CheckLinearizedData();
825 if (nResult != DataAvailable)
826 return nResult;
827
828 if (m_pHintTables) {
829 nResult = m_pHintTables->CheckPage(dwPage);
830 if (nResult != DataAvailable)
831 return nResult;
832 if (GetPageDictionary(dwPage)) {
833 m_pagesLoadState.insert(dwPage);
834 return DataAvailable;
835 }
836 }
837
838 if (!m_bMainXRefLoadedOK) {
839 if (!LoadAllFile())
840 return DataNotAvailable;
841 m_pDocument->GetParser()->RebuildCrossRef();
842 ResetFirstCheck(dwPage);
843 return DataAvailable;
844 }
845 if (m_bTotalLoadPageTree) {
846 if (!LoadPages())
847 return DataNotAvailable;
848 } else {
849 if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
850 return DataNotAvailable;
851 }
852 } else {
853 if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
854 return DataNotAvailable;
855 }
856 }
857
858 if (CheckAcroForm() == DocFormStatus::FormNotAvailable)
859 return DataNotAvailable;
860
861 auto* pPageDict = m_pDocument->GetPageDictionary(safePage.ValueOrDie());
862 if (!pPageDict)
863 return DataError;
864
865 {
866 auto page_num_obj = std::make_pair(
867 dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
868 GetValidator(), m_pDocument.Get(), pPageDict));
869 CPDF_PageObjectAvail* page_obj_avail =
870 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
871 const DocAvailStatus status = page_obj_avail->CheckAvail();
872 if (status != DocAvailStatus::DataAvailable)
873 return status;
874 }
875
876 const DocAvailStatus resources_status = CheckResources(pPageDict);
877 if (resources_status != DocAvailStatus::DataAvailable)
878 return resources_status;
879
880 m_bCurPageDictLoadOK = false;
881 ResetFirstCheck(dwPage);
882 m_pagesLoadState.insert(dwPage);
883 return DataAvailable;
884 }
885
CheckResources(CPDF_Dictionary * page)886 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
887 CPDF_Dictionary* page) {
888 ASSERT(page);
889 const CPDF_ReadValidator::Session read_session(GetValidator());
890 CPDF_Object* resources = GetResourceObject(page);
891 if (GetValidator()->has_read_problems())
892 return DocAvailStatus::DataNotAvailable;
893
894 if (!resources)
895 return DocAvailStatus::DataAvailable;
896
897 CPDF_PageObjectAvail* resource_avail =
898 m_PagesResourcesAvail
899 .insert(std::make_pair(
900 resources, pdfium::MakeUnique<CPDF_PageObjectAvail>(
901 GetValidator(), m_pDocument.Get(), resources)))
902 .first->second.get();
903 return resource_avail->CheckAvail();
904 }
905
GetValidator() const906 RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
907 return m_pFileRead;
908 }
909
GetSyntaxParser() const910 CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
911 return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
912 : m_parser.m_pSyntax.get();
913 }
914
GetPageCount() const915 int CPDF_DataAvail::GetPageCount() const {
916 if (m_pLinearized)
917 return m_pLinearized->GetPageCount();
918 return m_pDocument ? m_pDocument->GetPageCount() : 0;
919 }
920
GetPageDictionary(int index) const921 CPDF_Dictionary* CPDF_DataAvail::GetPageDictionary(int index) const {
922 if (!m_pDocument || index < 0 || index >= GetPageCount())
923 return nullptr;
924 CPDF_Dictionary* page = m_pDocument->GetPageDictionary(index);
925 if (page)
926 return page;
927 if (!m_pLinearized || !m_pHintTables)
928 return nullptr;
929
930 if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
931 return nullptr;
932 FX_FILESIZE szPageStartPos = 0;
933 FX_FILESIZE szPageLength = 0;
934 uint32_t dwObjNum = 0;
935 const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
936 &szPageLength, &dwObjNum);
937 if (!bPagePosGot || !dwObjNum)
938 return nullptr;
939 // We should say to the document, which object is the page.
940 m_pDocument->SetPageObjNum(index, dwObjNum);
941 // Page object already can be parsed in document.
942 if (!m_pDocument->GetIndirectObject(dwObjNum)) {
943 m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
944 dwObjNum,
945 ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument.Get()));
946 }
947 if (!ValidatePage(index))
948 return nullptr;
949 return m_pDocument->GetPageDictionary(index);
950 }
951
IsFormAvail(DownloadHints * pHints)952 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
953 DownloadHints* pHints) {
954 const HintsScope hints_scope(GetValidator(), pHints);
955 return CheckAcroForm();
956 }
957
CheckAcroForm()958 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
959 if (!m_pDocument)
960 return FormAvailable;
961
962 if (m_pLinearized) {
963 DocAvailStatus nDocStatus = CheckLinearizedData();
964 if (nDocStatus == DataError)
965 return FormError;
966 if (nDocStatus == DataNotAvailable)
967 return FormNotAvailable;
968 }
969
970 if (!m_pFormAvail) {
971 CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
972 if (!pRoot)
973 return FormAvailable;
974
975 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
976 if (!pAcroForm)
977 return FormNotExist;
978
979 m_pFormAvail = pdfium::MakeUnique<CPDF_PageObjectAvail>(
980 GetValidator(), m_pDocument.Get(), pAcroForm);
981 }
982 switch (m_pFormAvail->CheckAvail()) {
983 case DocAvailStatus::DataError:
984 return DocFormStatus::FormError;
985 case DocAvailStatus::DataNotAvailable:
986 return DocFormStatus::FormNotAvailable;
987 case DocAvailStatus::DataAvailable:
988 return DocFormStatus::FormAvailable;
989 default:
990 NOTREACHED();
991 }
992 return DocFormStatus::FormError;
993 }
994
ValidatePage(uint32_t dwPage) const995 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const {
996 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
997 auto* pPageDict = m_pDocument->GetPageDictionary(safePage.ValueOrDie());
998 if (!pPageDict)
999 return false;
1000 CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument.Get(), pPageDict);
1001 return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable;
1002 }
1003
1004 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
ParseDocument(std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,std::unique_ptr<CPDF_Document::PageDataIface> pPageData,const char * password)1005 CPDF_DataAvail::ParseDocument(
1006 std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
1007 std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
1008 const char* password) {
1009 if (m_pDocument) {
1010 // We already returned parsed document.
1011 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1012 }
1013 auto document = pdfium::MakeUnique<CPDF_Document>(std::move(pRenderData),
1014 std::move(pPageData));
1015 document->AddObserver(this);
1016
1017 CPDF_ReadValidator::Session read_session(GetValidator());
1018 CPDF_Parser::Error error =
1019 document->LoadLinearizedDoc(GetValidator(), password);
1020
1021 // Additional check, that all ok.
1022 if (GetValidator()->has_read_problems()) {
1023 NOTREACHED();
1024 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1025 }
1026
1027 if (error != CPDF_Parser::SUCCESS)
1028 return std::make_pair(error, nullptr);
1029
1030 m_pDocument = document.get();
1031 return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
1032 }
1033
PageNode()1034 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
1035
~PageNode()1036 CPDF_DataAvail::PageNode::~PageNode() {}
1037