• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8 
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12 
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
18 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_page_object_avail.h"
22 #include "core/fpdfapi/parser/cpdf_read_validator.h"
23 #include "core/fpdfapi/parser/cpdf_reference.h"
24 #include "core/fpdfapi/parser/cpdf_stream.h"
25 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcrt/autorestorer.h"
28 #include "core/fxcrt/fx_extension.h"
29 #include "core/fxcrt/fx_safe_types.h"
30 #include "core/fxcrt/stl_util.h"
31 #include "third_party/base/check.h"
32 #include "third_party/base/containers/contains.h"
33 #include "third_party/base/notreached.h"
34 #include "third_party/base/numerics/safe_conversions.h"
35 
36 namespace {
37 
GetResourceObject(RetainPtr<CPDF_Dictionary> pDict)38 RetainPtr<CPDF_Object> GetResourceObject(RetainPtr<CPDF_Dictionary> pDict) {
39   constexpr size_t kMaxHierarchyDepth = 64;
40   size_t depth = 0;
41 
42   while (pDict) {
43     RetainPtr<CPDF_Object> result = pDict->GetMutableObjectFor("Resources");
44     if (result)
45       return result;
46     if (++depth > kMaxHierarchyDepth) {
47       // We have cycle in parents hierarchy.
48       return nullptr;
49     }
50     RetainPtr<CPDF_Object> parent = pDict->GetMutableObjectFor("Parent");
51     pDict = parent ? parent->GetMutableDict() : nullptr;
52   }
53   return nullptr;
54 }
55 
56 class HintsScope {
57  public:
HintsScope(RetainPtr<CPDF_ReadValidator> validator,CPDF_DataAvail::DownloadHints * hints)58   HintsScope(RetainPtr<CPDF_ReadValidator> validator,
59              CPDF_DataAvail::DownloadHints* hints)
60       : validator_(std::move(validator)) {
61     DCHECK(validator_);
62     validator_->SetDownloadHints(hints);
63   }
64 
~HintsScope()65   ~HintsScope() { validator_->SetDownloadHints(nullptr); }
66 
67  private:
68   RetainPtr<CPDF_ReadValidator> validator_;
69 };
70 
71 }  // namespace
72 
73 CPDF_DataAvail::FileAvail::~FileAvail() = default;
74 
75 CPDF_DataAvail::DownloadHints::~DownloadHints() = default;
76 
CPDF_DataAvail(FileAvail * pFileAvail,RetainPtr<IFX_SeekableReadStream> pFileRead)77 CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail,
78                                RetainPtr<IFX_SeekableReadStream> pFileRead)
79     : m_pFileRead(pdfium::MakeRetain<CPDF_ReadValidator>(std::move(pFileRead),
80                                                          pFileAvail)),
81       m_dwFileLen(m_pFileRead->GetSize()) {}
82 
~CPDF_DataAvail()83 CPDF_DataAvail::~CPDF_DataAvail() {
84   m_pHintTables.reset();
85   if (m_pDocument)
86     m_pDocument->RemoveObserver(this);
87 }
88 
OnObservableDestroyed()89 void CPDF_DataAvail::OnObservableDestroyed() {
90   m_pDocument = nullptr;
91   m_pFormAvail.reset();
92   m_PagesArray.clear();
93   m_PagesObjAvail.clear();
94   m_PagesResourcesAvail.clear();
95 }
96 
IsDocAvail(DownloadHints * pHints)97 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
98     DownloadHints* pHints) {
99   if (!m_dwFileLen)
100     return kDataError;
101 
102   DCHECK(m_SeenPageObjList.empty());
103   AutoRestorer<std::set<uint32_t>> seen_objects_restorer(&m_SeenPageObjList);
104   const HintsScope hints_scope(GetValidator(), pHints);
105   while (!m_bDocAvail) {
106     if (!CheckDocStatus())
107       return kDataNotAvailable;
108   }
109 
110   return kDataAvailable;
111 }
112 
CheckDocStatus()113 bool CPDF_DataAvail::CheckDocStatus() {
114   switch (m_internalStatus) {
115     case InternalStatus::kHeader:
116       return CheckHeader();
117     case InternalStatus::kFirstPage:
118       return CheckFirstPage();
119     case InternalStatus::kHintTable:
120       return CheckHintTables();
121     case InternalStatus::kLoadAllCrossRef:
122       return CheckAndLoadAllXref();
123     case InternalStatus::kLoadAllFile:
124       return LoadAllFile();
125     case InternalStatus::kRoot:
126       return CheckRoot();
127     case InternalStatus::kInfo:
128       return CheckInfo();
129     case InternalStatus::kPageTree:
130       if (m_bTotalLoadPageTree)
131         return CheckPages();
132       return LoadDocPages();
133     case InternalStatus::kPage:
134       if (m_bTotalLoadPageTree)
135         return CheckPage();
136       m_internalStatus = InternalStatus::kPageLaterLoad;
137       return true;
138     case InternalStatus::kError:
139       return LoadAllFile();
140     case InternalStatus::kPageLaterLoad:
141       m_internalStatus = InternalStatus::kPage;
142       [[fallthrough]];
143     default:
144       m_bDocAvail = true;
145       return true;
146   }
147 }
148 
CheckPageStatus()149 bool CPDF_DataAvail::CheckPageStatus() {
150   switch (m_internalStatus) {
151     case InternalStatus::kPageTree:
152       return CheckPages();
153     case InternalStatus::kPage:
154       return CheckPage();
155     case InternalStatus::kError:
156       return LoadAllFile();
157     default:
158       m_bPagesTreeLoad = true;
159       m_bPagesLoad = true;
160       return true;
161   }
162 }
163 
LoadAllFile()164 bool CPDF_DataAvail::LoadAllFile() {
165   if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
166     m_internalStatus = InternalStatus::kDone;
167     return true;
168   }
169   return false;
170 }
171 
CheckAndLoadAllXref()172 bool CPDF_DataAvail::CheckAndLoadAllXref() {
173   if (!m_pCrossRefAvail) {
174     CPDF_ReadValidator::ScopedSession read_session(GetValidator());
175     const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
176     if (GetValidator()->has_read_problems())
177       return false;
178 
179     if (last_xref_offset <= 0) {
180       m_internalStatus = InternalStatus::kError;
181       return false;
182     }
183 
184     m_pCrossRefAvail = std::make_unique<CPDF_CrossRefAvail>(GetSyntaxParser(),
185                                                             last_xref_offset);
186   }
187 
188   switch (m_pCrossRefAvail->CheckAvail()) {
189     case kDataAvailable:
190       break;
191     case kDataNotAvailable:
192       return false;
193     case kDataError:
194       m_internalStatus = InternalStatus::kError;
195       return false;
196     default:
197       NOTREACHED();
198       return false;
199   }
200 
201   if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
202       !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
203     m_internalStatus = InternalStatus::kLoadAllFile;
204     return false;
205   }
206 
207   m_internalStatus = InternalStatus::kRoot;
208   return true;
209 }
210 
GetObject(uint32_t objnum,bool * pExistInFile)211 RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
212                                                  bool* pExistInFile) {
213   *pExistInFile = false;
214   CPDF_Parser* pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
215   if (!pParser)
216     return nullptr;
217 
218   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
219   RetainPtr<CPDF_Object> pRet = pParser->ParseIndirectObject(objnum);
220   if (!pRet)
221     return nullptr;
222 
223   *pExistInFile = true;
224   if (GetValidator()->has_read_problems())
225     return nullptr;
226 
227   return pRet;
228 }
229 
CheckInfo()230 bool CPDF_DataAvail::CheckInfo() {
231   const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum();
232   if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) {
233     m_internalStatus = InternalStatus::kPageTree;
234     return true;
235   }
236 
237   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
238   m_parser.ParseIndirectObject(dwInfoObjNum);
239   if (GetValidator()->has_read_problems())
240     return false;
241 
242   m_internalStatus = InternalStatus::kPageTree;
243   return true;
244 }
245 
CheckRoot()246 bool CPDF_DataAvail::CheckRoot() {
247   const uint32_t dwRootObjNum = m_parser.GetRootObjNum();
248   if (dwRootObjNum == CPDF_Object::kInvalidObjNum) {
249     m_internalStatus = InternalStatus::kError;
250     return true;
251   }
252 
253   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
254   m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum));
255   if (GetValidator()->has_read_problems())
256     return false;
257 
258   if (!m_pRoot) {
259     m_internalStatus = InternalStatus::kError;
260     return false;
261   }
262 
263   RetainPtr<const CPDF_Reference> pRef =
264       ToReference(m_pRoot->GetObjectFor("Pages"));
265   if (!pRef) {
266     m_internalStatus = InternalStatus::kError;
267     return false;
268   }
269 
270   m_PagesObjNum = pRef->GetRefObjNum();
271   m_internalStatus = InternalStatus::kInfo;
272   return true;
273 }
274 
PreparePageItem()275 bool CPDF_DataAvail::PreparePageItem() {
276   const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
277   if (!pRoot) {
278     m_internalStatus = InternalStatus::kError;
279     return false;
280   }
281 
282   RetainPtr<const CPDF_Reference> pRef =
283       ToReference(pRoot->GetObjectFor("Pages"));
284   if (!pRef) {
285     m_internalStatus = InternalStatus::kError;
286     return false;
287   }
288 
289   m_PagesObjNum = pRef->GetRefObjNum();
290   m_internalStatus = InternalStatus::kPageTree;
291   return true;
292 }
293 
IsFirstCheck(uint32_t dwPage)294 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
295   return m_pageMapCheckState.insert(dwPage).second;
296 }
297 
ResetFirstCheck(uint32_t dwPage)298 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
299   m_pageMapCheckState.erase(dwPage);
300 }
301 
CheckPage()302 bool CPDF_DataAvail::CheckPage() {
303   std::vector<uint32_t> UnavailObjList;
304   for (uint32_t dwPageObjNum : m_PageObjList) {
305     bool bExists = false;
306     RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
307     if (!pObj) {
308       if (bExists)
309         UnavailObjList.push_back(dwPageObjNum);
310       continue;
311     }
312 
313     switch (pObj->GetType()) {
314       case CPDF_Object::kArray: {
315         CPDF_ArrayLocker locker(pObj->AsArray());
316         for (const auto& pArrayObj : locker) {
317           const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
318           if (pRef)
319             UnavailObjList.push_back(pRef->GetRefObjNum());
320         }
321         break;
322       }
323       case CPDF_Object::kDictionary:
324         if (pObj->GetDict()->GetNameFor("Type") == "Pages")
325           m_PagesArray.push_back(std::move(pObj));
326         break;
327       default:
328         break;
329     }
330   }
331   m_PageObjList.clear();
332   if (!UnavailObjList.empty()) {
333     m_PageObjList = std::move(UnavailObjList);
334     return false;
335   }
336   size_t iPages = m_PagesArray.size();
337   for (size_t i = 0; i < iPages; ++i) {
338     RetainPtr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
339     if (pPages && !GetPageKids(pPages.Get())) {
340       m_PagesArray.clear();
341       m_internalStatus = InternalStatus::kError;
342       return false;
343     }
344   }
345   m_PagesArray.clear();
346   if (m_PageObjList.empty())
347     m_internalStatus = InternalStatus::kDone;
348 
349   return true;
350 }
351 
GetPageKids(CPDF_Object * pPages)352 bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) {
353   RetainPtr<const CPDF_Dictionary> pDict = pPages->GetDict();
354   if (!pDict)
355     return true;
356 
357   RetainPtr<const CPDF_Object> pKids = pDict->GetObjectFor("Kids");
358   if (!pKids)
359     return true;
360 
361   std::vector<uint32_t> object_numbers;
362   switch (pKids->GetType()) {
363     case CPDF_Object::kReference:
364       object_numbers.push_back(pKids->AsReference()->GetRefObjNum());
365       break;
366     case CPDF_Object::kArray: {
367       CPDF_ArrayLocker locker(pKids->AsArray());
368       for (const auto& pArrayObj : locker) {
369         const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
370         if (pRef)
371           object_numbers.push_back(pRef->GetRefObjNum());
372       }
373       break;
374     }
375     default:
376       m_internalStatus = InternalStatus::kError;
377       return false;
378   }
379 
380   for (uint32_t num : object_numbers) {
381     bool inserted = m_SeenPageObjList.insert(num).second;
382     if (inserted)
383       m_PageObjList.push_back(num);
384   }
385   return true;
386 }
387 
CheckPages()388 bool CPDF_DataAvail::CheckPages() {
389   bool bExists = false;
390   RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
391   if (!bExists) {
392     m_internalStatus = InternalStatus::kLoadAllFile;
393     return true;
394   }
395 
396   if (!pPages) {
397     if (m_internalStatus == InternalStatus::kError) {
398       m_internalStatus = InternalStatus::kLoadAllFile;
399       return true;
400     }
401     return false;
402   }
403 
404   if (!GetPageKids(pPages.Get())) {
405     m_internalStatus = InternalStatus::kError;
406     return false;
407   }
408 
409   m_internalStatus = InternalStatus::kPage;
410   return true;
411 }
412 
CheckHeader()413 bool CPDF_DataAvail::CheckHeader() {
414   switch (CheckHeaderAndLinearized()) {
415     case kDataAvailable:
416       m_internalStatus = m_pLinearized ? InternalStatus::kFirstPage
417                                        : InternalStatus::kLoadAllCrossRef;
418       return true;
419     case kDataNotAvailable:
420       return false;
421     case kDataError:
422       m_internalStatus = InternalStatus::kError;
423       return true;
424     default:
425       NOTREACHED();
426       return false;
427   }
428 }
429 
CheckFirstPage()430 bool CPDF_DataAvail::CheckFirstPage() {
431   if (!m_pLinearized->GetFirstPageEndOffset() ||
432       !m_pLinearized->GetFileSize() ||
433       !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
434     m_internalStatus = InternalStatus::kError;
435     return false;
436   }
437 
438   uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
439   dwEnd += 512;
440   if ((FX_FILESIZE)dwEnd > m_dwFileLen)
441     dwEnd = (uint32_t)m_dwFileLen;
442 
443   const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
444   const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
445   if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
446                                                              data_size))
447     return false;
448 
449   m_internalStatus = InternalStatus::kHintTable;
450   return true;
451 }
452 
CheckHintTables()453 bool CPDF_DataAvail::CheckHintTables() {
454   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
455   m_pHintTables =
456       CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get());
457 
458   if (GetValidator()->read_error()) {
459     m_internalStatus = InternalStatus::kError;
460     return true;
461   }
462   if (GetValidator()->has_unavailable_data())
463     return false;
464 
465   m_internalStatus = InternalStatus::kDone;
466   return true;
467 }
468 
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList) const469 RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
470     FX_FILESIZE pos,
471     uint32_t objnum,
472     CPDF_IndirectObjectHolder* pObjList) const {
473   const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
474   GetSyntaxParser()->SetPos(pos);
475   RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
476       pObjList, CPDF_SyntaxParser::ParseType::kLoose);
477   GetSyntaxParser()->SetPos(SavedPos);
478   return (result && (!objnum || result->GetObjNum() == objnum))
479              ? std::move(result)
480              : nullptr;
481 }
482 
IsLinearizedPDF()483 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
484   switch (CheckHeaderAndLinearized()) {
485     case kDataAvailable:
486       return m_pLinearized ? kLinearized : kNotLinearized;
487     case kDataNotAvailable:
488       return kLinearizationUnknown;
489     case kDataError:
490       return kNotLinearized;
491     default:
492       NOTREACHED();
493       return kLinearizationUnknown;
494   }
495 }
496 
CheckHeaderAndLinearized()497 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
498   if (m_bHeaderAvail)
499     return kDataAvailable;
500 
501   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
502   const absl::optional<FX_FILESIZE> header_offset =
503       GetHeaderOffset(GetValidator());
504   if (GetValidator()->has_read_problems())
505     return kDataNotAvailable;
506 
507   if (!header_offset.has_value())
508     return kDataError;
509 
510   m_parser.m_pSyntax = std::make_unique<CPDF_SyntaxParser>(
511       GetValidator(), header_offset.value());
512   m_pLinearized = m_parser.ParseLinearizedHeader();
513   if (GetValidator()->has_read_problems())
514     return kDataNotAvailable;
515 
516   m_bHeaderAvail = true;
517   return kDataAvailable;
518 }
519 
CheckPage(uint32_t dwPage)520 bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
521   while (true) {
522     switch (m_internalStatus) {
523       case InternalStatus::kPageTree:
524         if (!LoadDocPages())
525           return false;
526         break;
527       case InternalStatus::kPage:
528         if (!LoadDocPage(dwPage))
529           return false;
530         break;
531       case InternalStatus::kError:
532         return LoadAllFile();
533       default:
534         m_bPagesTreeLoad = true;
535         m_bPagesLoad = true;
536         m_bCurPageDictLoadOK = true;
537         m_internalStatus = InternalStatus::kPage;
538         return true;
539     }
540   }
541 }
542 
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode)543 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
544                                         PageNode* pPageNode) {
545   bool bExists = false;
546   RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
547   if (!bExists) {
548     m_internalStatus = InternalStatus::kError;
549     return false;
550   }
551 
552   if (!pPages)
553     return false;
554 
555   const CPDF_Array* pArray = pPages->AsArray();
556   if (!pArray) {
557     m_internalStatus = InternalStatus::kError;
558     return false;
559   }
560 
561   pPageNode->m_type = PageNode::Type::kPages;
562   for (size_t i = 0; i < pArray->size(); ++i) {
563     RetainPtr<const CPDF_Reference> pKid = ToReference(pArray->GetObjectAt(i));
564     if (!pKid)
565       continue;
566 
567     auto pNode = std::make_unique<PageNode>();
568     pNode->m_dwPageNo = pKid->GetRefObjNum();
569     pPageNode->m_ChildNodes.push_back(std::move(pNode));
570   }
571   return true;
572 }
573 
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode)574 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
575                                           PageNode* pPageNode) {
576   bool bExists = false;
577   RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
578   if (!bExists) {
579     m_internalStatus = InternalStatus::kError;
580     return false;
581   }
582 
583   if (!pPage)
584     return false;
585 
586   if (pPage->IsArray()) {
587     pPageNode->m_dwPageNo = dwPageNo;
588     pPageNode->m_type = PageNode::Type::kArray;
589     return true;
590   }
591 
592   if (!pPage->IsDictionary()) {
593     m_internalStatus = InternalStatus::kError;
594     return false;
595   }
596 
597   pPageNode->m_dwPageNo = dwPageNo;
598   RetainPtr<CPDF_Dictionary> pDict = pPage->GetMutableDict();
599   const ByteString type = pDict->GetNameFor("Type");
600   if (type == "Page") {
601     pPageNode->m_type = PageNode::Type::kPage;
602     return true;
603   }
604 
605   if (type != "Pages") {
606     m_internalStatus = InternalStatus::kError;
607     return false;
608   }
609 
610   pPageNode->m_type = PageNode::Type::kPages;
611   RetainPtr<CPDF_Object> pKids = pDict->GetMutableObjectFor("Kids");
612   if (!pKids) {
613     m_internalStatus = InternalStatus::kPage;
614     return true;
615   }
616 
617   switch (pKids->GetType()) {
618     case CPDF_Object::kReference: {
619       const CPDF_Reference* pKid = pKids->AsReference();
620       auto pNode = std::make_unique<PageNode>();
621       pNode->m_dwPageNo = pKid->GetRefObjNum();
622       pPageNode->m_ChildNodes.push_back(std::move(pNode));
623       break;
624     }
625     case CPDF_Object::kArray: {
626       const CPDF_Array* pKidsArray = pKids->AsArray();
627       for (size_t i = 0; i < pKidsArray->size(); ++i) {
628         RetainPtr<const CPDF_Reference> pKid =
629             ToReference(pKidsArray->GetObjectAt(i));
630         if (!pKid)
631           continue;
632 
633         auto pNode = std::make_unique<PageNode>();
634         pNode->m_dwPageNo = pKid->GetRefObjNum();
635         pPageNode->m_ChildNodes.push_back(std::move(pNode));
636       }
637       break;
638     }
639     default:
640       break;
641   }
642   return true;
643 }
644 
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,int level)645 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
646                                    int32_t iPage,
647                                    int32_t& iCount,
648                                    int level) {
649   if (level >= kMaxPageRecursionDepth)
650     return false;
651 
652   int32_t iSize = fxcrt::CollectionSize<int32_t>(pageNode.m_ChildNodes);
653   if (iSize <= 0 || iPage >= iSize) {
654     m_internalStatus = InternalStatus::kError;
655     return false;
656   }
657   for (int32_t i = 0; i < iSize; ++i) {
658     PageNode* pNode = pageNode.m_ChildNodes[i].get();
659     if (!pNode)
660       continue;
661 
662     if (pNode->m_type == PageNode::Type::kUnknown) {
663       // Updates the type for the unknown page node.
664       if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
665         return false;
666     }
667     if (pNode->m_type == PageNode::Type::kArray) {
668       // Updates a more specific type for the array page node.
669       if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
670         return false;
671     }
672     switch (pNode->m_type) {
673       case PageNode::Type::kPage:
674         iCount++;
675         if (iPage == iCount && m_pDocument)
676           m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
677         break;
678       case PageNode::Type::kPages:
679         if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
680           return false;
681         break;
682       case PageNode::Type::kUnknown:
683       case PageNode::Type::kArray:
684         // Already converted above, error if we get here.
685         return false;
686     }
687     if (iPage == iCount) {
688       m_internalStatus = InternalStatus::kDone;
689       return true;
690     }
691   }
692   return true;
693 }
694 
LoadDocPage(uint32_t dwPage)695 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
696   int iPage = pdfium::base::checked_cast<int>(dwPage);
697   if (m_pDocument->GetPageCount() <= iPage ||
698       m_pDocument->IsPageLoaded(iPage)) {
699     m_internalStatus = InternalStatus::kDone;
700     return true;
701   }
702   if (m_PageNode.m_type == PageNode::Type::kPage) {
703     m_internalStatus =
704         iPage == 0 ? InternalStatus::kDone : InternalStatus::kError;
705     return true;
706   }
707   int32_t iCount = -1;
708   return CheckPageNode(m_PageNode, iPage, iCount, 0);
709 }
710 
CheckPageCount()711 bool CPDF_DataAvail::CheckPageCount() {
712   bool bExists = false;
713   RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
714   if (!bExists) {
715     m_internalStatus = InternalStatus::kError;
716     return false;
717   }
718   if (!pPages)
719     return false;
720 
721   RetainPtr<const CPDF_Dictionary> pPagesDict = pPages->GetDict();
722   if (!pPagesDict) {
723     m_internalStatus = InternalStatus::kError;
724     return false;
725   }
726   if (!pPagesDict->KeyExist("Kids"))
727     return true;
728 
729   return pPagesDict->GetIntegerFor("Count") > 0;
730 }
731 
LoadDocPages()732 bool CPDF_DataAvail::LoadDocPages() {
733   if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
734     return false;
735 
736   if (CheckPageCount()) {
737     m_internalStatus = InternalStatus::kPage;
738     return true;
739   }
740 
741   m_bTotalLoadPageTree = true;
742   return false;
743 }
744 
LoadPages()745 bool CPDF_DataAvail::LoadPages() {
746   while (!m_bPagesTreeLoad) {
747     if (!CheckPageStatus())
748       return false;
749   }
750 
751   if (m_bPagesLoad)
752     return true;
753 
754   m_pDocument->LoadPages();
755   return false;
756 }
757 
CheckLinearizedData()758 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
759   if (m_bLinearedDataOK)
760     return kDataAvailable;
761   DCHECK(m_pLinearized);
762   if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
763       !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
764     return kDataError;
765   }
766 
767   if (!m_bMainXRefLoadTried) {
768     const FX_SAFE_FILESIZE prev =
769         m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
770     const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
771     if (main_xref_offset < 0)
772       return kDataError;
773 
774     if (main_xref_offset == 0)
775       return kDataAvailable;
776 
777     FX_SAFE_SIZE_T data_size = m_dwFileLen;
778     data_size -= main_xref_offset;
779     if (!data_size.IsValid())
780       return kDataError;
781 
782     if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
783             main_xref_offset, data_size.ValueOrDie()))
784       return kDataNotAvailable;
785 
786     CPDF_Parser::Error eRet =
787         m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
788     m_bMainXRefLoadTried = true;
789     if (eRet != CPDF_Parser::SUCCESS)
790       return kDataError;
791 
792     if (!PreparePageItem())
793       return kDataNotAvailable;
794 
795     m_bMainXRefLoadedOK = true;
796     m_bLinearedDataOK = true;
797   }
798 
799   return m_bLinearedDataOK ? kDataAvailable : kDataNotAvailable;
800 }
801 
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)802 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
803     uint32_t dwPage,
804     DownloadHints* pHints) {
805   if (!m_pDocument)
806     return kDataError;
807 
808   const int iPage = pdfium::base::checked_cast<int>(dwPage);
809   if (iPage >= m_pDocument->GetPageCount()) {
810     // This is XFA page.
811     return kDataAvailable;
812   }
813 
814   if (IsFirstCheck(dwPage)) {
815     m_bCurPageDictLoadOK = false;
816   }
817 
818   if (pdfium::Contains(m_pagesLoadState, dwPage))
819     return kDataAvailable;
820 
821   const HintsScope hints_scope(GetValidator(), pHints);
822   if (m_pLinearized) {
823     if (dwPage == m_pLinearized->GetFirstPageNo()) {
824       RetainPtr<const CPDF_Dictionary> pPageDict =
825           m_pDocument->GetPageDictionary(iPage);
826       if (!pPageDict)
827         return kDataError;
828 
829       auto page_num_obj =
830           std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
831                                      GetValidator(), m_pDocument, pPageDict));
832 
833       CPDF_PageObjectAvail* page_obj_avail =
834           m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
835       // TODO(art-snake): Check resources.
836       return page_obj_avail->CheckAvail();
837     }
838 
839     DocAvailStatus nResult = CheckLinearizedData();
840     if (nResult != kDataAvailable)
841       return nResult;
842 
843     if (m_pHintTables) {
844       nResult = m_pHintTables->CheckPage(dwPage);
845       if (nResult != kDataAvailable)
846         return nResult;
847       if (GetPageDictionary(dwPage)) {
848         m_pagesLoadState.insert(dwPage);
849         return kDataAvailable;
850       }
851     }
852 
853     if (!m_bMainXRefLoadedOK) {
854       if (!LoadAllFile())
855         return kDataNotAvailable;
856       m_pDocument->GetParser()->RebuildCrossRef();
857       ResetFirstCheck(dwPage);
858       return kDataAvailable;
859     }
860     if (m_bTotalLoadPageTree) {
861       if (!LoadPages())
862         return kDataNotAvailable;
863     } else {
864       if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
865         return kDataNotAvailable;
866     }
867   } else {
868     if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
869       return kDataNotAvailable;
870     }
871   }
872 
873   if (CheckAcroForm() == kFormNotAvailable)
874     return kDataNotAvailable;
875 
876   RetainPtr<CPDF_Dictionary> pPageDict =
877       m_pDocument->GetMutablePageDictionary(iPage);
878   if (!pPageDict)
879     return kDataError;
880 
881   {
882     auto page_num_obj =
883         std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
884                                    GetValidator(), m_pDocument, pPageDict));
885     CPDF_PageObjectAvail* page_obj_avail =
886         m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
887     const DocAvailStatus status = page_obj_avail->CheckAvail();
888     if (status != kDataAvailable)
889       return status;
890   }
891 
892   const DocAvailStatus resources_status = CheckResources(std::move(pPageDict));
893   if (resources_status != kDataAvailable)
894     return resources_status;
895 
896   m_bCurPageDictLoadOK = false;
897   ResetFirstCheck(dwPage);
898   m_pagesLoadState.insert(dwPage);
899   return kDataAvailable;
900 }
901 
CheckResources(RetainPtr<CPDF_Dictionary> page)902 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
903     RetainPtr<CPDF_Dictionary> page) {
904   DCHECK(page);
905   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
906   RetainPtr<CPDF_Object> resources = GetResourceObject(std::move(page));
907   if (GetValidator()->has_read_problems())
908     return kDataNotAvailable;
909 
910   if (!resources)
911     return kDataAvailable;
912 
913   CPDF_PageObjectAvail* resource_avail =
914       m_PagesResourcesAvail
915           .insert(std::make_pair(resources,
916                                  std::make_unique<CPDF_PageObjectAvail>(
917                                      GetValidator(), m_pDocument, resources)))
918           .first->second.get();
919   return resource_avail->CheckAvail();
920 }
921 
GetValidator() const922 RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
923   return m_pFileRead;
924 }
925 
GetSyntaxParser() const926 CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
927   return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
928                      : m_parser.m_pSyntax.get();
929 }
930 
GetPageCount() const931 int CPDF_DataAvail::GetPageCount() const {
932   if (m_pLinearized)
933     return m_pLinearized->GetPageCount();
934   return m_pDocument ? m_pDocument->GetPageCount() : 0;
935 }
936 
GetPageDictionary(int index) const937 RetainPtr<const CPDF_Dictionary> CPDF_DataAvail::GetPageDictionary(
938     int index) const {
939   if (!m_pDocument || index < 0 || index >= GetPageCount())
940     return nullptr;
941   RetainPtr<const CPDF_Dictionary> page = m_pDocument->GetPageDictionary(index);
942   if (page)
943     return page;
944   if (!m_pLinearized || !m_pHintTables)
945     return nullptr;
946 
947   if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
948     return nullptr;
949   FX_FILESIZE szPageStartPos = 0;
950   FX_FILESIZE szPageLength = 0;
951   uint32_t dwObjNum = 0;
952   const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
953                                                      &szPageLength, &dwObjNum);
954   if (!bPagePosGot || !dwObjNum)
955     return nullptr;
956   // We should say to the document, which object is the page.
957   m_pDocument->SetPageObjNum(index, dwObjNum);
958   // Page object already can be parsed in document.
959   if (!m_pDocument->GetIndirectObject(dwObjNum)) {
960     m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
961         dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
962   }
963   if (!ValidatePage(index))
964     return nullptr;
965   return m_pDocument->GetPageDictionary(index);
966 }
967 
IsFormAvail(DownloadHints * pHints)968 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
969     DownloadHints* pHints) {
970   const HintsScope hints_scope(GetValidator(), pHints);
971   return CheckAcroForm();
972 }
973 
CheckAcroForm()974 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
975   if (!m_pDocument)
976     return kFormAvailable;
977 
978   if (m_pLinearized) {
979     DocAvailStatus nDocStatus = CheckLinearizedData();
980     if (nDocStatus == kDataError)
981       return kFormError;
982     if (nDocStatus == kDataNotAvailable)
983       return kFormNotAvailable;
984   }
985 
986   if (!m_pFormAvail) {
987     const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
988     if (!pRoot)
989       return kFormAvailable;
990 
991     RetainPtr<const CPDF_Object> pAcroForm = pRoot->GetObjectFor("AcroForm");
992     if (!pAcroForm)
993       return kFormNotExist;
994 
995     m_pFormAvail = std::make_unique<CPDF_PageObjectAvail>(
996         GetValidator(), m_pDocument, std::move(pAcroForm));
997   }
998   switch (m_pFormAvail->CheckAvail()) {
999     case kDataError:
1000       return kFormError;
1001     case kDataNotAvailable:
1002       return kFormNotAvailable;
1003     case kDataAvailable:
1004       return kFormAvailable;
1005     default:
1006       NOTREACHED();
1007   }
1008   return kFormError;
1009 }
1010 
ValidatePage(uint32_t dwPage) const1011 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const {
1012   int iPage = pdfium::base::checked_cast<int>(dwPage);
1013   RetainPtr<const CPDF_Dictionary> pPageDict =
1014       m_pDocument->GetPageDictionary(iPage);
1015   if (!pPageDict)
1016     return false;
1017 
1018   CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument,
1019                                  std::move(pPageDict));
1020   return obj_avail.CheckAvail() == kDataAvailable;
1021 }
1022 
1023 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
ParseDocument(std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,std::unique_ptr<CPDF_Document::PageDataIface> pPageData,const ByteString & password)1024 CPDF_DataAvail::ParseDocument(
1025     std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
1026     std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
1027     const ByteString& password) {
1028   if (m_pDocument) {
1029     // We already returned parsed document.
1030     return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1031   }
1032   auto document = std::make_unique<CPDF_Document>(std::move(pRenderData),
1033                                                   std::move(pPageData));
1034   document->AddObserver(this);
1035 
1036   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
1037   CPDF_Parser::Error error =
1038       document->LoadLinearizedDoc(GetValidator(), password);
1039 
1040   // Additional check, that all ok.
1041   if (GetValidator()->has_read_problems()) {
1042     NOTREACHED();
1043     return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1044   }
1045 
1046   if (error != CPDF_Parser::SUCCESS)
1047     return std::make_pair(error, nullptr);
1048 
1049   m_pDocument = document.get();
1050   return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
1051 }
1052 
1053 CPDF_DataAvail::PageNode::PageNode() = default;
1054 
1055 CPDF_DataAvail::PageNode::~PageNode() = default;
1056