1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
18 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_page_object_avail.h"
22 #include "core/fpdfapi/parser/cpdf_read_validator.h"
23 #include "core/fpdfapi/parser/cpdf_reference.h"
24 #include "core/fpdfapi/parser/cpdf_stream.h"
25 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcrt/autorestorer.h"
28 #include "core/fxcrt/fx_extension.h"
29 #include "core/fxcrt/fx_safe_types.h"
30 #include "core/fxcrt/stl_util.h"
31 #include "third_party/base/check.h"
32 #include "third_party/base/containers/contains.h"
33 #include "third_party/base/notreached.h"
34 #include "third_party/base/numerics/safe_conversions.h"
35
36 namespace {
37
GetResourceObject(RetainPtr<CPDF_Dictionary> pDict)38 RetainPtr<CPDF_Object> GetResourceObject(RetainPtr<CPDF_Dictionary> pDict) {
39 constexpr size_t kMaxHierarchyDepth = 64;
40 size_t depth = 0;
41
42 while (pDict) {
43 RetainPtr<CPDF_Object> result = pDict->GetMutableObjectFor("Resources");
44 if (result)
45 return result;
46 if (++depth > kMaxHierarchyDepth) {
47 // We have cycle in parents hierarchy.
48 return nullptr;
49 }
50 RetainPtr<CPDF_Object> parent = pDict->GetMutableObjectFor("Parent");
51 pDict = parent ? parent->GetMutableDict() : nullptr;
52 }
53 return nullptr;
54 }
55
56 class HintsScope {
57 public:
HintsScope(RetainPtr<CPDF_ReadValidator> validator,CPDF_DataAvail::DownloadHints * hints)58 HintsScope(RetainPtr<CPDF_ReadValidator> validator,
59 CPDF_DataAvail::DownloadHints* hints)
60 : validator_(std::move(validator)) {
61 DCHECK(validator_);
62 validator_->SetDownloadHints(hints);
63 }
64
~HintsScope()65 ~HintsScope() { validator_->SetDownloadHints(nullptr); }
66
67 private:
68 RetainPtr<CPDF_ReadValidator> validator_;
69 };
70
71 } // namespace
72
73 CPDF_DataAvail::FileAvail::~FileAvail() = default;
74
75 CPDF_DataAvail::DownloadHints::~DownloadHints() = default;
76
CPDF_DataAvail(FileAvail * pFileAvail,RetainPtr<IFX_SeekableReadStream> pFileRead)77 CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail,
78 RetainPtr<IFX_SeekableReadStream> pFileRead)
79 : m_pFileRead(pdfium::MakeRetain<CPDF_ReadValidator>(std::move(pFileRead),
80 pFileAvail)),
81 m_dwFileLen(m_pFileRead->GetSize()) {}
82
~CPDF_DataAvail()83 CPDF_DataAvail::~CPDF_DataAvail() {
84 m_pHintTables.reset();
85 if (m_pDocument)
86 m_pDocument->RemoveObserver(this);
87 }
88
OnObservableDestroyed()89 void CPDF_DataAvail::OnObservableDestroyed() {
90 m_pDocument = nullptr;
91 m_pFormAvail.reset();
92 m_PagesArray.clear();
93 m_PagesObjAvail.clear();
94 m_PagesResourcesAvail.clear();
95 }
96
IsDocAvail(DownloadHints * pHints)97 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
98 DownloadHints* pHints) {
99 if (!m_dwFileLen)
100 return kDataError;
101
102 DCHECK(m_SeenPageObjList.empty());
103 AutoRestorer<std::set<uint32_t>> seen_objects_restorer(&m_SeenPageObjList);
104 const HintsScope hints_scope(GetValidator(), pHints);
105 while (!m_bDocAvail) {
106 if (!CheckDocStatus())
107 return kDataNotAvailable;
108 }
109
110 return kDataAvailable;
111 }
112
CheckDocStatus()113 bool CPDF_DataAvail::CheckDocStatus() {
114 switch (m_internalStatus) {
115 case InternalStatus::kHeader:
116 return CheckHeader();
117 case InternalStatus::kFirstPage:
118 return CheckFirstPage();
119 case InternalStatus::kHintTable:
120 return CheckHintTables();
121 case InternalStatus::kLoadAllCrossRef:
122 return CheckAndLoadAllXref();
123 case InternalStatus::kLoadAllFile:
124 return LoadAllFile();
125 case InternalStatus::kRoot:
126 return CheckRoot();
127 case InternalStatus::kInfo:
128 return CheckInfo();
129 case InternalStatus::kPageTree:
130 if (m_bTotalLoadPageTree)
131 return CheckPages();
132 return LoadDocPages();
133 case InternalStatus::kPage:
134 if (m_bTotalLoadPageTree)
135 return CheckPage();
136 m_internalStatus = InternalStatus::kPageLaterLoad;
137 return true;
138 case InternalStatus::kError:
139 return LoadAllFile();
140 case InternalStatus::kPageLaterLoad:
141 m_internalStatus = InternalStatus::kPage;
142 [[fallthrough]];
143 default:
144 m_bDocAvail = true;
145 return true;
146 }
147 }
148
CheckPageStatus()149 bool CPDF_DataAvail::CheckPageStatus() {
150 switch (m_internalStatus) {
151 case InternalStatus::kPageTree:
152 return CheckPages();
153 case InternalStatus::kPage:
154 return CheckPage();
155 case InternalStatus::kError:
156 return LoadAllFile();
157 default:
158 m_bPagesTreeLoad = true;
159 m_bPagesLoad = true;
160 return true;
161 }
162 }
163
LoadAllFile()164 bool CPDF_DataAvail::LoadAllFile() {
165 if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
166 m_internalStatus = InternalStatus::kDone;
167 return true;
168 }
169 return false;
170 }
171
CheckAndLoadAllXref()172 bool CPDF_DataAvail::CheckAndLoadAllXref() {
173 if (!m_pCrossRefAvail) {
174 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
175 const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
176 if (GetValidator()->has_read_problems())
177 return false;
178
179 if (last_xref_offset <= 0) {
180 m_internalStatus = InternalStatus::kError;
181 return false;
182 }
183
184 m_pCrossRefAvail = std::make_unique<CPDF_CrossRefAvail>(GetSyntaxParser(),
185 last_xref_offset);
186 }
187
188 switch (m_pCrossRefAvail->CheckAvail()) {
189 case kDataAvailable:
190 break;
191 case kDataNotAvailable:
192 return false;
193 case kDataError:
194 m_internalStatus = InternalStatus::kError;
195 return false;
196 default:
197 NOTREACHED();
198 return false;
199 }
200
201 if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
202 !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
203 m_internalStatus = InternalStatus::kLoadAllFile;
204 return false;
205 }
206
207 m_internalStatus = InternalStatus::kRoot;
208 return true;
209 }
210
GetObject(uint32_t objnum,bool * pExistInFile)211 RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
212 bool* pExistInFile) {
213 *pExistInFile = false;
214 CPDF_Parser* pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
215 if (!pParser)
216 return nullptr;
217
218 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
219 RetainPtr<CPDF_Object> pRet = pParser->ParseIndirectObject(objnum);
220 if (!pRet)
221 return nullptr;
222
223 *pExistInFile = true;
224 if (GetValidator()->has_read_problems())
225 return nullptr;
226
227 return pRet;
228 }
229
CheckInfo()230 bool CPDF_DataAvail::CheckInfo() {
231 const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum();
232 if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) {
233 m_internalStatus = InternalStatus::kPageTree;
234 return true;
235 }
236
237 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
238 m_parser.ParseIndirectObject(dwInfoObjNum);
239 if (GetValidator()->has_read_problems())
240 return false;
241
242 m_internalStatus = InternalStatus::kPageTree;
243 return true;
244 }
245
CheckRoot()246 bool CPDF_DataAvail::CheckRoot() {
247 const uint32_t dwRootObjNum = m_parser.GetRootObjNum();
248 if (dwRootObjNum == CPDF_Object::kInvalidObjNum) {
249 m_internalStatus = InternalStatus::kError;
250 return true;
251 }
252
253 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
254 m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum));
255 if (GetValidator()->has_read_problems())
256 return false;
257
258 if (!m_pRoot) {
259 m_internalStatus = InternalStatus::kError;
260 return false;
261 }
262
263 RetainPtr<const CPDF_Reference> pRef =
264 ToReference(m_pRoot->GetObjectFor("Pages"));
265 if (!pRef) {
266 m_internalStatus = InternalStatus::kError;
267 return false;
268 }
269
270 m_PagesObjNum = pRef->GetRefObjNum();
271 m_internalStatus = InternalStatus::kInfo;
272 return true;
273 }
274
PreparePageItem()275 bool CPDF_DataAvail::PreparePageItem() {
276 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
277 if (!pRoot) {
278 m_internalStatus = InternalStatus::kError;
279 return false;
280 }
281
282 RetainPtr<const CPDF_Reference> pRef =
283 ToReference(pRoot->GetObjectFor("Pages"));
284 if (!pRef) {
285 m_internalStatus = InternalStatus::kError;
286 return false;
287 }
288
289 m_PagesObjNum = pRef->GetRefObjNum();
290 m_internalStatus = InternalStatus::kPageTree;
291 return true;
292 }
293
IsFirstCheck(uint32_t dwPage)294 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
295 return m_pageMapCheckState.insert(dwPage).second;
296 }
297
ResetFirstCheck(uint32_t dwPage)298 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
299 m_pageMapCheckState.erase(dwPage);
300 }
301
CheckPage()302 bool CPDF_DataAvail::CheckPage() {
303 std::vector<uint32_t> UnavailObjList;
304 for (uint32_t dwPageObjNum : m_PageObjList) {
305 bool bExists = false;
306 RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
307 if (!pObj) {
308 if (bExists)
309 UnavailObjList.push_back(dwPageObjNum);
310 continue;
311 }
312
313 switch (pObj->GetType()) {
314 case CPDF_Object::kArray: {
315 CPDF_ArrayLocker locker(pObj->AsArray());
316 for (const auto& pArrayObj : locker) {
317 const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
318 if (pRef)
319 UnavailObjList.push_back(pRef->GetRefObjNum());
320 }
321 break;
322 }
323 case CPDF_Object::kDictionary:
324 if (pObj->GetDict()->GetNameFor("Type") == "Pages")
325 m_PagesArray.push_back(std::move(pObj));
326 break;
327 default:
328 break;
329 }
330 }
331 m_PageObjList.clear();
332 if (!UnavailObjList.empty()) {
333 m_PageObjList = std::move(UnavailObjList);
334 return false;
335 }
336 size_t iPages = m_PagesArray.size();
337 for (size_t i = 0; i < iPages; ++i) {
338 RetainPtr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
339 if (pPages && !GetPageKids(pPages.Get())) {
340 m_PagesArray.clear();
341 m_internalStatus = InternalStatus::kError;
342 return false;
343 }
344 }
345 m_PagesArray.clear();
346 if (m_PageObjList.empty())
347 m_internalStatus = InternalStatus::kDone;
348
349 return true;
350 }
351
GetPageKids(CPDF_Object * pPages)352 bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) {
353 RetainPtr<const CPDF_Dictionary> pDict = pPages->GetDict();
354 if (!pDict)
355 return true;
356
357 RetainPtr<const CPDF_Object> pKids = pDict->GetObjectFor("Kids");
358 if (!pKids)
359 return true;
360
361 std::vector<uint32_t> object_numbers;
362 switch (pKids->GetType()) {
363 case CPDF_Object::kReference:
364 object_numbers.push_back(pKids->AsReference()->GetRefObjNum());
365 break;
366 case CPDF_Object::kArray: {
367 CPDF_ArrayLocker locker(pKids->AsArray());
368 for (const auto& pArrayObj : locker) {
369 const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
370 if (pRef)
371 object_numbers.push_back(pRef->GetRefObjNum());
372 }
373 break;
374 }
375 default:
376 m_internalStatus = InternalStatus::kError;
377 return false;
378 }
379
380 for (uint32_t num : object_numbers) {
381 bool inserted = m_SeenPageObjList.insert(num).second;
382 if (inserted)
383 m_PageObjList.push_back(num);
384 }
385 return true;
386 }
387
CheckPages()388 bool CPDF_DataAvail::CheckPages() {
389 bool bExists = false;
390 RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
391 if (!bExists) {
392 m_internalStatus = InternalStatus::kLoadAllFile;
393 return true;
394 }
395
396 if (!pPages) {
397 if (m_internalStatus == InternalStatus::kError) {
398 m_internalStatus = InternalStatus::kLoadAllFile;
399 return true;
400 }
401 return false;
402 }
403
404 if (!GetPageKids(pPages.Get())) {
405 m_internalStatus = InternalStatus::kError;
406 return false;
407 }
408
409 m_internalStatus = InternalStatus::kPage;
410 return true;
411 }
412
CheckHeader()413 bool CPDF_DataAvail::CheckHeader() {
414 switch (CheckHeaderAndLinearized()) {
415 case kDataAvailable:
416 m_internalStatus = m_pLinearized ? InternalStatus::kFirstPage
417 : InternalStatus::kLoadAllCrossRef;
418 return true;
419 case kDataNotAvailable:
420 return false;
421 case kDataError:
422 m_internalStatus = InternalStatus::kError;
423 return true;
424 default:
425 NOTREACHED();
426 return false;
427 }
428 }
429
CheckFirstPage()430 bool CPDF_DataAvail::CheckFirstPage() {
431 if (!m_pLinearized->GetFirstPageEndOffset() ||
432 !m_pLinearized->GetFileSize() ||
433 !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
434 m_internalStatus = InternalStatus::kError;
435 return false;
436 }
437
438 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
439 dwEnd += 512;
440 if ((FX_FILESIZE)dwEnd > m_dwFileLen)
441 dwEnd = (uint32_t)m_dwFileLen;
442
443 const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
444 const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
445 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
446 data_size))
447 return false;
448
449 m_internalStatus = InternalStatus::kHintTable;
450 return true;
451 }
452
CheckHintTables()453 bool CPDF_DataAvail::CheckHintTables() {
454 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
455 m_pHintTables =
456 CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get());
457
458 if (GetValidator()->read_error()) {
459 m_internalStatus = InternalStatus::kError;
460 return true;
461 }
462 if (GetValidator()->has_unavailable_data())
463 return false;
464
465 m_internalStatus = InternalStatus::kDone;
466 return true;
467 }
468
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList) const469 RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
470 FX_FILESIZE pos,
471 uint32_t objnum,
472 CPDF_IndirectObjectHolder* pObjList) const {
473 const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
474 GetSyntaxParser()->SetPos(pos);
475 RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
476 pObjList, CPDF_SyntaxParser::ParseType::kLoose);
477 GetSyntaxParser()->SetPos(SavedPos);
478 return (result && (!objnum || result->GetObjNum() == objnum))
479 ? std::move(result)
480 : nullptr;
481 }
482
IsLinearizedPDF()483 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
484 switch (CheckHeaderAndLinearized()) {
485 case kDataAvailable:
486 return m_pLinearized ? kLinearized : kNotLinearized;
487 case kDataNotAvailable:
488 return kLinearizationUnknown;
489 case kDataError:
490 return kNotLinearized;
491 default:
492 NOTREACHED();
493 return kLinearizationUnknown;
494 }
495 }
496
CheckHeaderAndLinearized()497 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
498 if (m_bHeaderAvail)
499 return kDataAvailable;
500
501 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
502 const absl::optional<FX_FILESIZE> header_offset =
503 GetHeaderOffset(GetValidator());
504 if (GetValidator()->has_read_problems())
505 return kDataNotAvailable;
506
507 if (!header_offset.has_value())
508 return kDataError;
509
510 m_parser.m_pSyntax = std::make_unique<CPDF_SyntaxParser>(
511 GetValidator(), header_offset.value());
512 m_pLinearized = m_parser.ParseLinearizedHeader();
513 if (GetValidator()->has_read_problems())
514 return kDataNotAvailable;
515
516 m_bHeaderAvail = true;
517 return kDataAvailable;
518 }
519
CheckPage(uint32_t dwPage)520 bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
521 while (true) {
522 switch (m_internalStatus) {
523 case InternalStatus::kPageTree:
524 if (!LoadDocPages())
525 return false;
526 break;
527 case InternalStatus::kPage:
528 if (!LoadDocPage(dwPage))
529 return false;
530 break;
531 case InternalStatus::kError:
532 return LoadAllFile();
533 default:
534 m_bPagesTreeLoad = true;
535 m_bPagesLoad = true;
536 m_bCurPageDictLoadOK = true;
537 m_internalStatus = InternalStatus::kPage;
538 return true;
539 }
540 }
541 }
542
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode)543 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
544 PageNode* pPageNode) {
545 bool bExists = false;
546 RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
547 if (!bExists) {
548 m_internalStatus = InternalStatus::kError;
549 return false;
550 }
551
552 if (!pPages)
553 return false;
554
555 const CPDF_Array* pArray = pPages->AsArray();
556 if (!pArray) {
557 m_internalStatus = InternalStatus::kError;
558 return false;
559 }
560
561 pPageNode->m_type = PageNode::Type::kPages;
562 for (size_t i = 0; i < pArray->size(); ++i) {
563 RetainPtr<const CPDF_Reference> pKid = ToReference(pArray->GetObjectAt(i));
564 if (!pKid)
565 continue;
566
567 auto pNode = std::make_unique<PageNode>();
568 pNode->m_dwPageNo = pKid->GetRefObjNum();
569 pPageNode->m_ChildNodes.push_back(std::move(pNode));
570 }
571 return true;
572 }
573
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode)574 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
575 PageNode* pPageNode) {
576 bool bExists = false;
577 RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
578 if (!bExists) {
579 m_internalStatus = InternalStatus::kError;
580 return false;
581 }
582
583 if (!pPage)
584 return false;
585
586 if (pPage->IsArray()) {
587 pPageNode->m_dwPageNo = dwPageNo;
588 pPageNode->m_type = PageNode::Type::kArray;
589 return true;
590 }
591
592 if (!pPage->IsDictionary()) {
593 m_internalStatus = InternalStatus::kError;
594 return false;
595 }
596
597 pPageNode->m_dwPageNo = dwPageNo;
598 RetainPtr<CPDF_Dictionary> pDict = pPage->GetMutableDict();
599 const ByteString type = pDict->GetNameFor("Type");
600 if (type == "Page") {
601 pPageNode->m_type = PageNode::Type::kPage;
602 return true;
603 }
604
605 if (type != "Pages") {
606 m_internalStatus = InternalStatus::kError;
607 return false;
608 }
609
610 pPageNode->m_type = PageNode::Type::kPages;
611 RetainPtr<CPDF_Object> pKids = pDict->GetMutableObjectFor("Kids");
612 if (!pKids) {
613 m_internalStatus = InternalStatus::kPage;
614 return true;
615 }
616
617 switch (pKids->GetType()) {
618 case CPDF_Object::kReference: {
619 const CPDF_Reference* pKid = pKids->AsReference();
620 auto pNode = std::make_unique<PageNode>();
621 pNode->m_dwPageNo = pKid->GetRefObjNum();
622 pPageNode->m_ChildNodes.push_back(std::move(pNode));
623 break;
624 }
625 case CPDF_Object::kArray: {
626 const CPDF_Array* pKidsArray = pKids->AsArray();
627 for (size_t i = 0; i < pKidsArray->size(); ++i) {
628 RetainPtr<const CPDF_Reference> pKid =
629 ToReference(pKidsArray->GetObjectAt(i));
630 if (!pKid)
631 continue;
632
633 auto pNode = std::make_unique<PageNode>();
634 pNode->m_dwPageNo = pKid->GetRefObjNum();
635 pPageNode->m_ChildNodes.push_back(std::move(pNode));
636 }
637 break;
638 }
639 default:
640 break;
641 }
642 return true;
643 }
644
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,int level)645 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
646 int32_t iPage,
647 int32_t& iCount,
648 int level) {
649 if (level >= kMaxPageRecursionDepth)
650 return false;
651
652 int32_t iSize = fxcrt::CollectionSize<int32_t>(pageNode.m_ChildNodes);
653 if (iSize <= 0 || iPage >= iSize) {
654 m_internalStatus = InternalStatus::kError;
655 return false;
656 }
657 for (int32_t i = 0; i < iSize; ++i) {
658 PageNode* pNode = pageNode.m_ChildNodes[i].get();
659 if (!pNode)
660 continue;
661
662 if (pNode->m_type == PageNode::Type::kUnknown) {
663 // Updates the type for the unknown page node.
664 if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
665 return false;
666 }
667 if (pNode->m_type == PageNode::Type::kArray) {
668 // Updates a more specific type for the array page node.
669 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
670 return false;
671 }
672 switch (pNode->m_type) {
673 case PageNode::Type::kPage:
674 iCount++;
675 if (iPage == iCount && m_pDocument)
676 m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
677 break;
678 case PageNode::Type::kPages:
679 if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
680 return false;
681 break;
682 case PageNode::Type::kUnknown:
683 case PageNode::Type::kArray:
684 // Already converted above, error if we get here.
685 return false;
686 }
687 if (iPage == iCount) {
688 m_internalStatus = InternalStatus::kDone;
689 return true;
690 }
691 }
692 return true;
693 }
694
LoadDocPage(uint32_t dwPage)695 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
696 int iPage = pdfium::base::checked_cast<int>(dwPage);
697 if (m_pDocument->GetPageCount() <= iPage ||
698 m_pDocument->IsPageLoaded(iPage)) {
699 m_internalStatus = InternalStatus::kDone;
700 return true;
701 }
702 if (m_PageNode.m_type == PageNode::Type::kPage) {
703 m_internalStatus =
704 iPage == 0 ? InternalStatus::kDone : InternalStatus::kError;
705 return true;
706 }
707 int32_t iCount = -1;
708 return CheckPageNode(m_PageNode, iPage, iCount, 0);
709 }
710
CheckPageCount()711 bool CPDF_DataAvail::CheckPageCount() {
712 bool bExists = false;
713 RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
714 if (!bExists) {
715 m_internalStatus = InternalStatus::kError;
716 return false;
717 }
718 if (!pPages)
719 return false;
720
721 RetainPtr<const CPDF_Dictionary> pPagesDict = pPages->GetDict();
722 if (!pPagesDict) {
723 m_internalStatus = InternalStatus::kError;
724 return false;
725 }
726 if (!pPagesDict->KeyExist("Kids"))
727 return true;
728
729 return pPagesDict->GetIntegerFor("Count") > 0;
730 }
731
LoadDocPages()732 bool CPDF_DataAvail::LoadDocPages() {
733 if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
734 return false;
735
736 if (CheckPageCount()) {
737 m_internalStatus = InternalStatus::kPage;
738 return true;
739 }
740
741 m_bTotalLoadPageTree = true;
742 return false;
743 }
744
LoadPages()745 bool CPDF_DataAvail::LoadPages() {
746 while (!m_bPagesTreeLoad) {
747 if (!CheckPageStatus())
748 return false;
749 }
750
751 if (m_bPagesLoad)
752 return true;
753
754 m_pDocument->LoadPages();
755 return false;
756 }
757
CheckLinearizedData()758 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
759 if (m_bLinearedDataOK)
760 return kDataAvailable;
761 DCHECK(m_pLinearized);
762 if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
763 !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
764 return kDataError;
765 }
766
767 if (!m_bMainXRefLoadTried) {
768 const FX_SAFE_FILESIZE prev =
769 m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
770 const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
771 if (main_xref_offset < 0)
772 return kDataError;
773
774 if (main_xref_offset == 0)
775 return kDataAvailable;
776
777 FX_SAFE_SIZE_T data_size = m_dwFileLen;
778 data_size -= main_xref_offset;
779 if (!data_size.IsValid())
780 return kDataError;
781
782 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
783 main_xref_offset, data_size.ValueOrDie()))
784 return kDataNotAvailable;
785
786 CPDF_Parser::Error eRet =
787 m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
788 m_bMainXRefLoadTried = true;
789 if (eRet != CPDF_Parser::SUCCESS)
790 return kDataError;
791
792 if (!PreparePageItem())
793 return kDataNotAvailable;
794
795 m_bMainXRefLoadedOK = true;
796 m_bLinearedDataOK = true;
797 }
798
799 return m_bLinearedDataOK ? kDataAvailable : kDataNotAvailable;
800 }
801
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)802 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
803 uint32_t dwPage,
804 DownloadHints* pHints) {
805 if (!m_pDocument)
806 return kDataError;
807
808 const int iPage = pdfium::base::checked_cast<int>(dwPage);
809 if (iPage >= m_pDocument->GetPageCount()) {
810 // This is XFA page.
811 return kDataAvailable;
812 }
813
814 if (IsFirstCheck(dwPage)) {
815 m_bCurPageDictLoadOK = false;
816 }
817
818 if (pdfium::Contains(m_pagesLoadState, dwPage))
819 return kDataAvailable;
820
821 const HintsScope hints_scope(GetValidator(), pHints);
822 if (m_pLinearized) {
823 if (dwPage == m_pLinearized->GetFirstPageNo()) {
824 RetainPtr<const CPDF_Dictionary> pPageDict =
825 m_pDocument->GetPageDictionary(iPage);
826 if (!pPageDict)
827 return kDataError;
828
829 auto page_num_obj =
830 std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
831 GetValidator(), m_pDocument, pPageDict));
832
833 CPDF_PageObjectAvail* page_obj_avail =
834 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
835 // TODO(art-snake): Check resources.
836 return page_obj_avail->CheckAvail();
837 }
838
839 DocAvailStatus nResult = CheckLinearizedData();
840 if (nResult != kDataAvailable)
841 return nResult;
842
843 if (m_pHintTables) {
844 nResult = m_pHintTables->CheckPage(dwPage);
845 if (nResult != kDataAvailable)
846 return nResult;
847 if (GetPageDictionary(dwPage)) {
848 m_pagesLoadState.insert(dwPage);
849 return kDataAvailable;
850 }
851 }
852
853 if (!m_bMainXRefLoadedOK) {
854 if (!LoadAllFile())
855 return kDataNotAvailable;
856 m_pDocument->GetParser()->RebuildCrossRef();
857 ResetFirstCheck(dwPage);
858 return kDataAvailable;
859 }
860 if (m_bTotalLoadPageTree) {
861 if (!LoadPages())
862 return kDataNotAvailable;
863 } else {
864 if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
865 return kDataNotAvailable;
866 }
867 } else {
868 if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
869 return kDataNotAvailable;
870 }
871 }
872
873 if (CheckAcroForm() == kFormNotAvailable)
874 return kDataNotAvailable;
875
876 RetainPtr<CPDF_Dictionary> pPageDict =
877 m_pDocument->GetMutablePageDictionary(iPage);
878 if (!pPageDict)
879 return kDataError;
880
881 {
882 auto page_num_obj =
883 std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
884 GetValidator(), m_pDocument, pPageDict));
885 CPDF_PageObjectAvail* page_obj_avail =
886 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
887 const DocAvailStatus status = page_obj_avail->CheckAvail();
888 if (status != kDataAvailable)
889 return status;
890 }
891
892 const DocAvailStatus resources_status = CheckResources(std::move(pPageDict));
893 if (resources_status != kDataAvailable)
894 return resources_status;
895
896 m_bCurPageDictLoadOK = false;
897 ResetFirstCheck(dwPage);
898 m_pagesLoadState.insert(dwPage);
899 return kDataAvailable;
900 }
901
CheckResources(RetainPtr<CPDF_Dictionary> page)902 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
903 RetainPtr<CPDF_Dictionary> page) {
904 DCHECK(page);
905 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
906 RetainPtr<CPDF_Object> resources = GetResourceObject(std::move(page));
907 if (GetValidator()->has_read_problems())
908 return kDataNotAvailable;
909
910 if (!resources)
911 return kDataAvailable;
912
913 CPDF_PageObjectAvail* resource_avail =
914 m_PagesResourcesAvail
915 .insert(std::make_pair(resources,
916 std::make_unique<CPDF_PageObjectAvail>(
917 GetValidator(), m_pDocument, resources)))
918 .first->second.get();
919 return resource_avail->CheckAvail();
920 }
921
GetValidator() const922 RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
923 return m_pFileRead;
924 }
925
GetSyntaxParser() const926 CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
927 return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
928 : m_parser.m_pSyntax.get();
929 }
930
GetPageCount() const931 int CPDF_DataAvail::GetPageCount() const {
932 if (m_pLinearized)
933 return m_pLinearized->GetPageCount();
934 return m_pDocument ? m_pDocument->GetPageCount() : 0;
935 }
936
GetPageDictionary(int index) const937 RetainPtr<const CPDF_Dictionary> CPDF_DataAvail::GetPageDictionary(
938 int index) const {
939 if (!m_pDocument || index < 0 || index >= GetPageCount())
940 return nullptr;
941 RetainPtr<const CPDF_Dictionary> page = m_pDocument->GetPageDictionary(index);
942 if (page)
943 return page;
944 if (!m_pLinearized || !m_pHintTables)
945 return nullptr;
946
947 if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
948 return nullptr;
949 FX_FILESIZE szPageStartPos = 0;
950 FX_FILESIZE szPageLength = 0;
951 uint32_t dwObjNum = 0;
952 const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
953 &szPageLength, &dwObjNum);
954 if (!bPagePosGot || !dwObjNum)
955 return nullptr;
956 // We should say to the document, which object is the page.
957 m_pDocument->SetPageObjNum(index, dwObjNum);
958 // Page object already can be parsed in document.
959 if (!m_pDocument->GetIndirectObject(dwObjNum)) {
960 m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
961 dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
962 }
963 if (!ValidatePage(index))
964 return nullptr;
965 return m_pDocument->GetPageDictionary(index);
966 }
967
IsFormAvail(DownloadHints * pHints)968 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
969 DownloadHints* pHints) {
970 const HintsScope hints_scope(GetValidator(), pHints);
971 return CheckAcroForm();
972 }
973
CheckAcroForm()974 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
975 if (!m_pDocument)
976 return kFormAvailable;
977
978 if (m_pLinearized) {
979 DocAvailStatus nDocStatus = CheckLinearizedData();
980 if (nDocStatus == kDataError)
981 return kFormError;
982 if (nDocStatus == kDataNotAvailable)
983 return kFormNotAvailable;
984 }
985
986 if (!m_pFormAvail) {
987 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
988 if (!pRoot)
989 return kFormAvailable;
990
991 RetainPtr<const CPDF_Object> pAcroForm = pRoot->GetObjectFor("AcroForm");
992 if (!pAcroForm)
993 return kFormNotExist;
994
995 m_pFormAvail = std::make_unique<CPDF_PageObjectAvail>(
996 GetValidator(), m_pDocument, std::move(pAcroForm));
997 }
998 switch (m_pFormAvail->CheckAvail()) {
999 case kDataError:
1000 return kFormError;
1001 case kDataNotAvailable:
1002 return kFormNotAvailable;
1003 case kDataAvailable:
1004 return kFormAvailable;
1005 default:
1006 NOTREACHED();
1007 }
1008 return kFormError;
1009 }
1010
ValidatePage(uint32_t dwPage) const1011 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const {
1012 int iPage = pdfium::base::checked_cast<int>(dwPage);
1013 RetainPtr<const CPDF_Dictionary> pPageDict =
1014 m_pDocument->GetPageDictionary(iPage);
1015 if (!pPageDict)
1016 return false;
1017
1018 CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument,
1019 std::move(pPageDict));
1020 return obj_avail.CheckAvail() == kDataAvailable;
1021 }
1022
1023 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
ParseDocument(std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,std::unique_ptr<CPDF_Document::PageDataIface> pPageData,const ByteString & password)1024 CPDF_DataAvail::ParseDocument(
1025 std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
1026 std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
1027 const ByteString& password) {
1028 if (m_pDocument) {
1029 // We already returned parsed document.
1030 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1031 }
1032 auto document = std::make_unique<CPDF_Document>(std::move(pRenderData),
1033 std::move(pPageData));
1034 document->AddObserver(this);
1035
1036 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
1037 CPDF_Parser::Error error =
1038 document->LoadLinearizedDoc(GetValidator(), password);
1039
1040 // Additional check, that all ok.
1041 if (GetValidator()->has_read_problems()) {
1042 NOTREACHED();
1043 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1044 }
1045
1046 if (error != CPDF_Parser::SUCCESS)
1047 return std::make_pair(error, nullptr);
1048
1049 m_pDocument = document.get();
1050 return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
1051 }
1052
1053 CPDF_DataAvail::PageNode::PageNode() = default;
1054
1055 CPDF_DataAvail::PageNode::~PageNode() = default;
1056