• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8 
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12 
13 #include "core/fpdfapi/cpdf_modulemgr.h"
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
18 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_reference.h"
22 #include "core/fpdfapi/parser/cpdf_stream.h"
23 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
24 #include "core/fxcrt/fx_ext.h"
25 #include "core/fxcrt/fx_safe_types.h"
26 #include "third_party/base/numerics/safe_conversions.h"
27 #include "third_party/base/ptr_util.h"
28 #include "third_party/base/stl_util.h"
29 
~FileAvail()30 CPDF_DataAvail::FileAvail::~FileAvail() {}
31 
~DownloadHints()32 CPDF_DataAvail::DownloadHints::~DownloadHints() {}
33 
34 // static
35 int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0;
36 
CPDF_DataAvail(FileAvail * pFileAvail,const CFX_RetainPtr<IFX_SeekableReadStream> & pFileRead,bool bSupportHintTable)37 CPDF_DataAvail::CPDF_DataAvail(
38     FileAvail* pFileAvail,
39     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead,
40     bool bSupportHintTable)
41     : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {
42   m_Pos = 0;
43   m_dwFileLen = 0;
44   if (m_pFileRead) {
45     m_dwFileLen = (uint32_t)m_pFileRead->GetSize();
46   }
47   m_dwCurrentOffset = 0;
48   m_dwXRefOffset = 0;
49   m_dwTrailerOffset = 0;
50   m_bufferOffset = 0;
51   m_bufferSize = 0;
52   m_PagesObjNum = 0;
53   m_dwCurrentXRefSteam = 0;
54   m_dwAcroFormObjNum = 0;
55   m_dwInfoObjNum = 0;
56   m_pDocument = 0;
57   m_dwEncryptObjNum = 0;
58   m_dwPrevXRefOffset = 0;
59   m_dwLastXRefOffset = 0;
60   m_bDocAvail = false;
61   m_bMainXRefLoadTried = false;
62   m_bDocAvail = false;
63   m_bPagesLoad = false;
64   m_bPagesTreeLoad = false;
65   m_bMainXRefLoadedOK = false;
66   m_bAnnotsLoad = false;
67   m_bHaveAcroForm = false;
68   m_bAcroFormLoad = false;
69   m_bPageLoadedOK = false;
70   m_bNeedDownLoadResource = false;
71   m_bLinearizedFormParamLoad = false;
72   m_pTrailer = nullptr;
73   m_pCurrentParser = nullptr;
74   m_pAcroForm = nullptr;
75   m_pPageDict = nullptr;
76   m_pPageResource = nullptr;
77   m_docStatus = PDF_DATAAVAIL_HEADER;
78   m_bTotalLoadPageTree = false;
79   m_bCurPageDictLoadOK = false;
80   m_bLinearedDataOK = false;
81   m_bSupportHintTable = bSupportHintTable;
82 }
83 
~CPDF_DataAvail()84 CPDF_DataAvail::~CPDF_DataAvail() {
85   m_pHintTables.reset();
86   for (CPDF_Object* pObject : m_arrayAcroforms)
87     delete pObject;
88 }
89 
SetDocument(CPDF_Document * pDoc)90 void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc) {
91   m_pDocument = pDoc;
92 }
93 
GetObjectSize(uint32_t objnum,FX_FILESIZE & offset)94 uint32_t CPDF_DataAvail::GetObjectSize(uint32_t objnum, FX_FILESIZE& offset) {
95   CPDF_Parser* pParser = m_pDocument->GetParser();
96   if (!pParser || !pParser->IsValidObjectNumber(objnum))
97     return 0;
98 
99   if (pParser->GetObjectType(objnum) == 2)
100     objnum = pParser->GetObjectPositionOrZero(objnum);
101 
102   if (pParser->GetObjectType(objnum) != 1 &&
103       pParser->GetObjectType(objnum) != 255) {
104     return 0;
105   }
106 
107   offset = pParser->GetObjectPositionOrZero(objnum);
108   if (offset == 0)
109     return 0;
110 
111   auto it = pParser->m_SortedOffset.find(offset);
112   if (it == pParser->m_SortedOffset.end() ||
113       ++it == pParser->m_SortedOffset.end()) {
114     return 0;
115   }
116   return *it - offset;
117 }
118 
AreObjectsAvailable(std::vector<CPDF_Object * > & obj_array,bool bParsePage,DownloadHints * pHints,std::vector<CPDF_Object * > & ret_array)119 bool CPDF_DataAvail::AreObjectsAvailable(std::vector<CPDF_Object*>& obj_array,
120                                          bool bParsePage,
121                                          DownloadHints* pHints,
122                                          std::vector<CPDF_Object*>& ret_array) {
123   if (obj_array.empty())
124     return true;
125 
126   uint32_t count = 0;
127   std::vector<CPDF_Object*> new_obj_array;
128   for (CPDF_Object* pObj : obj_array) {
129     if (!pObj)
130       continue;
131 
132     int32_t type = pObj->GetType();
133     switch (type) {
134       case CPDF_Object::ARRAY: {
135         CPDF_Array* pArray = pObj->AsArray();
136         for (size_t k = 0; k < pArray->GetCount(); ++k)
137           new_obj_array.push_back(pArray->GetObjectAt(k));
138       } break;
139       case CPDF_Object::STREAM:
140         pObj = pObj->GetDict();
141       case CPDF_Object::DICTIONARY: {
142         CPDF_Dictionary* pDict = pObj->GetDict();
143         if (pDict && pDict->GetStringFor("Type") == "Page" && !bParsePage)
144           continue;
145 
146         for (const auto& it : *pDict) {
147           if (it.first != "Parent")
148             new_obj_array.push_back(it.second.get());
149         }
150       } break;
151       case CPDF_Object::REFERENCE: {
152         CPDF_Reference* pRef = pObj->AsReference();
153         uint32_t dwNum = pRef->GetRefObjNum();
154 
155         FX_FILESIZE offset;
156         uint32_t size = GetObjectSize(dwNum, offset);
157         if (size == 0 || offset < 0 || offset >= m_dwFileLen)
158           break;
159 
160         if (!IsDataAvail(offset, size, pHints)) {
161           ret_array.push_back(pObj);
162           count++;
163         } else if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) {
164           m_ObjectSet.insert(dwNum);
165           CPDF_Object* pReferred =
166               m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum());
167           if (pReferred)
168             new_obj_array.push_back(pReferred);
169         }
170       } break;
171     }
172   }
173 
174   if (count > 0) {
175     for (CPDF_Object* pObj : new_obj_array) {
176       CPDF_Reference* pRef = pObj->AsReference();
177       if (pRef && pdfium::ContainsKey(m_ObjectSet, pRef->GetRefObjNum()))
178         continue;
179       ret_array.push_back(pObj);
180     }
181     return false;
182   }
183 
184   obj_array = new_obj_array;
185   return AreObjectsAvailable(obj_array, false, pHints, ret_array);
186 }
187 
IsDocAvail(DownloadHints * pHints)188 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
189     DownloadHints* pHints) {
190   if (!m_dwFileLen && m_pFileRead) {
191     m_dwFileLen = (uint32_t)m_pFileRead->GetSize();
192     if (!m_dwFileLen)
193       return DataError;
194   }
195 
196   while (!m_bDocAvail) {
197     if (!CheckDocStatus(pHints))
198       return DataNotAvailable;
199   }
200 
201   return DataAvailable;
202 }
203 
CheckAcroFormSubObject(DownloadHints * pHints)204 bool CPDF_DataAvail::CheckAcroFormSubObject(DownloadHints* pHints) {
205   if (m_objs_array.empty()) {
206     m_ObjectSet.clear();
207     std::vector<CPDF_Object*> obj_array = m_arrayAcroforms;
208     if (!AreObjectsAvailable(obj_array, false, pHints, m_objs_array))
209       return false;
210 
211     m_objs_array.clear();
212     return true;
213   }
214 
215   std::vector<CPDF_Object*> new_objs_array;
216   if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
217     m_objs_array = new_objs_array;
218     return false;
219   }
220 
221   for (CPDF_Object* pObject : m_arrayAcroforms)
222     delete pObject;
223 
224   m_arrayAcroforms.clear();
225   return true;
226 }
227 
CheckAcroForm(DownloadHints * pHints)228 bool CPDF_DataAvail::CheckAcroForm(DownloadHints* pHints) {
229   bool bExist = false;
230   m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist).release();
231   if (!bExist) {
232     m_docStatus = PDF_DATAAVAIL_PAGETREE;
233     return true;
234   }
235 
236   if (!m_pAcroForm) {
237     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
238       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
239       return true;
240     }
241     return false;
242   }
243 
244   m_arrayAcroforms.push_back(m_pAcroForm);
245   m_docStatus = PDF_DATAAVAIL_PAGETREE;
246   return true;
247 }
248 
CheckDocStatus(DownloadHints * pHints)249 bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) {
250   switch (m_docStatus) {
251     case PDF_DATAAVAIL_HEADER:
252       return CheckHeader(pHints);
253     case PDF_DATAAVAIL_FIRSTPAGE:
254       return CheckFirstPage(pHints);
255     case PDF_DATAAVAIL_HINTTABLE:
256       return CheckHintTables(pHints);
257     case PDF_DATAAVAIL_END:
258       return CheckEnd(pHints);
259     case PDF_DATAAVAIL_CROSSREF:
260       return CheckCrossRef(pHints);
261     case PDF_DATAAVAIL_CROSSREF_ITEM:
262       return CheckCrossRefItem(pHints);
263     case PDF_DATAAVAIL_CROSSREF_STREAM:
264       return CheckAllCrossRefStream(pHints);
265     case PDF_DATAAVAIL_TRAILER:
266       return CheckTrailer(pHints);
267     case PDF_DATAAVAIL_TRAILER_APPEND:
268       return CheckTrailerAppend(pHints);
269     case PDF_DATAAVAIL_LOADALLCROSSREF:
270       return LoadAllXref(pHints);
271     case PDF_DATAAVAIL_LOADALLFILE:
272       return LoadAllFile(pHints);
273     case PDF_DATAAVAIL_ROOT:
274       return CheckRoot(pHints);
275     case PDF_DATAAVAIL_INFO:
276       return CheckInfo(pHints);
277     case PDF_DATAAVAIL_ACROFORM:
278       return CheckAcroForm(pHints);
279     case PDF_DATAAVAIL_PAGETREE:
280       if (m_bTotalLoadPageTree)
281         return CheckPages(pHints);
282       return LoadDocPages(pHints);
283     case PDF_DATAAVAIL_PAGE:
284       if (m_bTotalLoadPageTree)
285         return CheckPage(pHints);
286       m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
287       return true;
288     case PDF_DATAAVAIL_ERROR:
289       return LoadAllFile(pHints);
290     case PDF_DATAAVAIL_PAGE_LATERLOAD:
291       m_docStatus = PDF_DATAAVAIL_PAGE;
292     default:
293       m_bDocAvail = true;
294       return true;
295   }
296 }
297 
CheckPageStatus(DownloadHints * pHints)298 bool CPDF_DataAvail::CheckPageStatus(DownloadHints* pHints) {
299   switch (m_docStatus) {
300     case PDF_DATAAVAIL_PAGETREE:
301       return CheckPages(pHints);
302     case PDF_DATAAVAIL_PAGE:
303       return CheckPage(pHints);
304     case PDF_DATAAVAIL_ERROR:
305       return LoadAllFile(pHints);
306     default:
307       m_bPagesTreeLoad = true;
308       m_bPagesLoad = true;
309       return true;
310   }
311 }
312 
LoadAllFile(DownloadHints * pHints)313 bool CPDF_DataAvail::LoadAllFile(DownloadHints* pHints) {
314   if (m_pFileAvail->IsDataAvail(0, (uint32_t)m_dwFileLen)) {
315     m_docStatus = PDF_DATAAVAIL_DONE;
316     return true;
317   }
318 
319   pHints->AddSegment(0, (uint32_t)m_dwFileLen);
320   return false;
321 }
322 
LoadAllXref(DownloadHints * pHints)323 bool CPDF_DataAvail::LoadAllXref(DownloadHints* pHints) {
324   m_parser.m_pSyntax->InitParser(m_pFileRead, (uint32_t)m_dwHeaderOffset);
325   if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&
326       !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
327     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
328     return false;
329   }
330 
331   m_dwRootObjNum = m_parser.GetRootObjNum();
332   m_dwInfoObjNum = m_parser.GetInfoObjNum();
333   m_pCurrentParser = &m_parser;
334   m_docStatus = PDF_DATAAVAIL_ROOT;
335   return true;
336 }
337 
GetObject(uint32_t objnum,DownloadHints * pHints,bool * pExistInFile)338 std::unique_ptr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
339                                                        DownloadHints* pHints,
340                                                        bool* pExistInFile) {
341   uint32_t size = 0;
342   FX_FILESIZE offset = 0;
343   CPDF_Parser* pParser = nullptr;
344 
345   if (pExistInFile)
346     *pExistInFile = true;
347 
348   if (m_pDocument) {
349     size = GetObjectSize(objnum, offset);
350     pParser = m_pDocument->GetParser();
351   } else {
352     size = (uint32_t)m_parser.GetObjectSize(objnum);
353     offset = m_parser.GetObjectOffset(objnum);
354     pParser = &m_parser;
355   }
356 
357   if (!IsDataAvail(offset, size, pHints))
358     return nullptr;
359 
360   std::unique_ptr<CPDF_Object> pRet;
361   if (pParser)
362     pRet = pParser->ParseIndirectObject(nullptr, objnum);
363 
364   if (!pRet && pExistInFile)
365     *pExistInFile = false;
366 
367   return pRet;
368 }
369 
CheckInfo(DownloadHints * pHints)370 bool CPDF_DataAvail::CheckInfo(DownloadHints* pHints) {
371   bool bExist = false;
372   std::unique_ptr<CPDF_Object> pInfo =
373       GetObject(m_dwInfoObjNum, pHints, &bExist);
374   if (bExist && !pInfo) {
375     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
376       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
377       return true;
378     }
379     if (m_Pos == m_dwFileLen)
380       m_docStatus = PDF_DATAAVAIL_ERROR;
381     return false;
382   }
383   m_docStatus =
384       m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE;
385   return true;
386 }
387 
CheckRoot(DownloadHints * pHints)388 bool CPDF_DataAvail::CheckRoot(DownloadHints* pHints) {
389   bool bExist = false;
390   m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist);
391   if (!bExist) {
392     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
393     return true;
394   }
395 
396   if (!m_pRoot) {
397     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
398       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
399       return true;
400     }
401     return false;
402   }
403 
404   CPDF_Dictionary* pDict = m_pRoot->GetDict();
405   if (!pDict) {
406     m_docStatus = PDF_DATAAVAIL_ERROR;
407     return false;
408   }
409 
410   CPDF_Reference* pRef = ToReference(pDict->GetObjectFor("Pages"));
411   if (!pRef) {
412     m_docStatus = PDF_DATAAVAIL_ERROR;
413     return false;
414   }
415 
416   m_PagesObjNum = pRef->GetRefObjNum();
417   CPDF_Reference* pAcroFormRef =
418       ToReference(m_pRoot->GetDict()->GetObjectFor("AcroForm"));
419   if (pAcroFormRef) {
420     m_bHaveAcroForm = true;
421     m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum();
422   }
423 
424   if (m_dwInfoObjNum) {
425     m_docStatus = PDF_DATAAVAIL_INFO;
426   } else {
427     m_docStatus =
428         m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE;
429   }
430   return true;
431 }
432 
PreparePageItem()433 bool CPDF_DataAvail::PreparePageItem() {
434   CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
435   CPDF_Reference* pRef =
436       ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
437   if (!pRef) {
438     m_docStatus = PDF_DATAAVAIL_ERROR;
439     return false;
440   }
441 
442   m_PagesObjNum = pRef->GetRefObjNum();
443   m_pCurrentParser = m_pDocument->GetParser();
444   m_docStatus = PDF_DATAAVAIL_PAGETREE;
445   return true;
446 }
447 
IsFirstCheck(uint32_t dwPage)448 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
449   return m_pageMapCheckState.insert(dwPage).second;
450 }
451 
ResetFirstCheck(uint32_t dwPage)452 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
453   m_pageMapCheckState.erase(dwPage);
454 }
455 
CheckPage(DownloadHints * pHints)456 bool CPDF_DataAvail::CheckPage(DownloadHints* pHints) {
457   std::vector<uint32_t> UnavailObjList;
458   for (uint32_t dwPageObjNum : m_PageObjList) {
459     bool bExists = false;
460     std::unique_ptr<CPDF_Object> pObj =
461         GetObject(dwPageObjNum, pHints, &bExists);
462     if (!pObj) {
463       if (bExists)
464         UnavailObjList.push_back(dwPageObjNum);
465       continue;
466     }
467     CPDF_Array* pArray = ToArray(pObj.get());
468     if (pArray) {
469       for (const auto& pArrayObj : *pArray) {
470         if (CPDF_Reference* pRef = ToReference(pArrayObj.get()))
471           UnavailObjList.push_back(pRef->GetRefObjNum());
472       }
473     }
474     if (!pObj->IsDictionary())
475       continue;
476 
477     CFX_ByteString type = pObj->GetDict()->GetStringFor("Type");
478     if (type == "Pages") {
479       m_PagesArray.push_back(std::move(pObj));
480       continue;
481     }
482   }
483   m_PageObjList.clear();
484   if (!UnavailObjList.empty()) {
485     m_PageObjList = std::move(UnavailObjList);
486     return false;
487   }
488   size_t iPages = m_PagesArray.size();
489   for (size_t i = 0; i < iPages; ++i) {
490     std::unique_ptr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
491     if (pPages && !GetPageKids(m_pCurrentParser, pPages.get())) {
492       m_PagesArray.clear();
493       m_docStatus = PDF_DATAAVAIL_ERROR;
494       return false;
495     }
496   }
497   m_PagesArray.clear();
498   if (m_PageObjList.empty())
499     m_docStatus = PDF_DATAAVAIL_DONE;
500 
501   return true;
502 }
503 
GetPageKids(CPDF_Parser * pParser,CPDF_Object * pPages)504 bool CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) {
505   if (!pParser) {
506     m_docStatus = PDF_DATAAVAIL_ERROR;
507     return false;
508   }
509 
510   CPDF_Dictionary* pDict = pPages->GetDict();
511   CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
512   if (!pKids)
513     return true;
514 
515   switch (pKids->GetType()) {
516     case CPDF_Object::REFERENCE:
517       m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum());
518       break;
519     case CPDF_Object::ARRAY: {
520       CPDF_Array* pKidsArray = pKids->AsArray();
521       for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
522         if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i)))
523           m_PageObjList.push_back(pRef->GetRefObjNum());
524       }
525     } break;
526     default:
527       m_docStatus = PDF_DATAAVAIL_ERROR;
528       return false;
529   }
530   return true;
531 }
532 
CheckPages(DownloadHints * pHints)533 bool CPDF_DataAvail::CheckPages(DownloadHints* pHints) {
534   bool bExists = false;
535   std::unique_ptr<CPDF_Object> pPages =
536       GetObject(m_PagesObjNum, pHints, &bExists);
537   if (!bExists) {
538     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
539     return true;
540   }
541 
542   if (!pPages) {
543     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
544       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
545       return true;
546     }
547     return false;
548   }
549 
550   if (!GetPageKids(m_pCurrentParser, pPages.get())) {
551     m_docStatus = PDF_DATAAVAIL_ERROR;
552     return false;
553   }
554 
555   m_docStatus = PDF_DATAAVAIL_PAGE;
556   return true;
557 }
558 
CheckHeader(DownloadHints * pHints)559 bool CPDF_DataAvail::CheckHeader(DownloadHints* pHints) {
560   ASSERT(m_dwFileLen >= 0);
561   const uint32_t kReqSize = std::min(static_cast<uint32_t>(m_dwFileLen), 1024U);
562 
563   if (m_pFileAvail->IsDataAvail(0, kReqSize)) {
564     uint8_t buffer[1024];
565     m_pFileRead->ReadBlock(buffer, 0, kReqSize);
566 
567     if (IsLinearizedFile(buffer, kReqSize)) {
568       m_docStatus = PDF_DATAAVAIL_FIRSTPAGE;
569     } else {
570       if (m_docStatus == PDF_DATAAVAIL_ERROR)
571         return false;
572       m_docStatus = PDF_DATAAVAIL_END;
573     }
574     return true;
575   }
576 
577   pHints->AddSegment(0, kReqSize);
578   return false;
579 }
580 
CheckFirstPage(DownloadHints * pHints)581 bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) {
582   if (!m_pLinearized->GetFirstPageEndOffset() ||
583       !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) {
584     m_docStatus = PDF_DATAAVAIL_ERROR;
585     return false;
586   }
587 
588   uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
589   dwEnd += 512;
590   if ((FX_FILESIZE)dwEnd > m_dwFileLen)
591     dwEnd = (uint32_t)m_dwFileLen;
592 
593   int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
594   int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
595   if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
596     pHints->AddSegment(iStartPos, iSize);
597     return false;
598   }
599 
600   m_docStatus =
601       m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
602   return true;
603 }
604 
IsDataAvail(FX_FILESIZE offset,uint32_t size,DownloadHints * pHints)605 bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset,
606                                  uint32_t size,
607                                  DownloadHints* pHints) {
608   if (offset < 0 || offset > m_dwFileLen)
609     return true;
610 
611   FX_SAFE_FILESIZE safeSize = offset;
612   safeSize += size;
613   safeSize += 512;
614   if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen)
615     size = m_dwFileLen - offset;
616   else
617     size += 512;
618 
619   if (!m_pFileAvail->IsDataAvail(offset, size)) {
620     if (pHints)
621       pHints->AddSegment(offset, size);
622     return false;
623   }
624   return true;
625 }
626 
CheckHintTables(DownloadHints * pHints)627 bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) {
628   if (m_pLinearized->GetPageCount() <= 1) {
629     m_docStatus = PDF_DATAAVAIL_DONE;
630     return true;
631   }
632   if (!m_pLinearized->HasHintTable()) {
633     m_docStatus = PDF_DATAAVAIL_ERROR;
634     return false;
635   }
636 
637   FX_FILESIZE szHintStart = m_pLinearized->GetHintStart();
638   FX_FILESIZE szHintLength = m_pLinearized->GetHintLength();
639 
640   if (!IsDataAvail(szHintStart, szHintLength, pHints))
641     return false;
642 
643   m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset);
644 
645   std::unique_ptr<CPDF_HintTables> pHintTables(
646       new CPDF_HintTables(this, m_pLinearized.get()));
647   std::unique_ptr<CPDF_Object> pHintStream(
648       ParseIndirectObjectAt(szHintStart, 0));
649   CPDF_Stream* pStream = ToStream(pHintStream.get());
650   if (pStream && pHintTables->LoadHintStream(pStream))
651     m_pHintTables = std::move(pHintTables);
652 
653   m_docStatus = PDF_DATAAVAIL_DONE;
654   return true;
655 }
656 
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList)657 std::unique_ptr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
658     FX_FILESIZE pos,
659     uint32_t objnum,
660     CPDF_IndirectObjectHolder* pObjList) {
661   FX_FILESIZE SavedPos = m_syntaxParser.SavePos();
662   m_syntaxParser.RestorePos(pos);
663 
664   bool bIsNumber;
665   CFX_ByteString word = m_syntaxParser.GetNextWord(&bIsNumber);
666   if (!bIsNumber)
667     return nullptr;
668 
669   uint32_t parser_objnum = FXSYS_atoui(word.c_str());
670   if (objnum && parser_objnum != objnum)
671     return nullptr;
672 
673   word = m_syntaxParser.GetNextWord(&bIsNumber);
674   if (!bIsNumber)
675     return nullptr;
676 
677   uint32_t gennum = FXSYS_atoui(word.c_str());
678   if (m_syntaxParser.GetKeyword() != "obj") {
679     m_syntaxParser.RestorePos(SavedPos);
680     return nullptr;
681   }
682 
683   std::unique_ptr<CPDF_Object> pObj =
684       m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, true);
685   m_syntaxParser.RestorePos(SavedPos);
686   return pObj;
687 }
688 
IsLinearizedPDF()689 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
690   const uint32_t kReqSize = 1024;
691   if (!m_pFileAvail->IsDataAvail(0, kReqSize))
692     return LinearizationUnknown;
693 
694   if (!m_pFileRead)
695     return NotLinearized;
696 
697   FX_FILESIZE dwSize = m_pFileRead->GetSize();
698   if (dwSize < (FX_FILESIZE)kReqSize)
699     return LinearizationUnknown;
700 
701   uint8_t buffer[1024];
702   m_pFileRead->ReadBlock(buffer, 0, kReqSize);
703   if (IsLinearizedFile(buffer, kReqSize))
704     return Linearized;
705 
706   return NotLinearized;
707 }
708 
IsLinearized()709 bool CPDF_DataAvail::IsLinearized() {
710   return !!m_pLinearized;
711 }
712 
IsLinearizedFile(uint8_t * pData,uint32_t dwLen)713 bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) {
714   if (m_pLinearized)
715     return true;
716 
717   CFX_RetainPtr<IFX_MemoryStream> file =
718       IFX_MemoryStream::Create(pData, (size_t)dwLen, false);
719   int32_t offset = GetHeaderOffset(file);
720   if (offset == -1) {
721     m_docStatus = PDF_DATAAVAIL_ERROR;
722     return false;
723   }
724 
725   m_dwHeaderOffset = offset;
726   m_syntaxParser.InitParser(file, offset);
727   m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9);
728 
729   bool bNumber;
730   CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber);
731   if (!bNumber)
732     return false;
733 
734   uint32_t objnum = FXSYS_atoui(wordObjNum.c_str());
735   m_pLinearized = CPDF_LinearizedHeader::CreateForObject(
736       ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum));
737   if (!m_pLinearized ||
738       m_pLinearized->GetFileSize() != m_pFileRead->GetSize()) {
739     m_pLinearized.reset();
740     return false;
741   }
742   return true;
743 }
744 
CheckEnd(DownloadHints * pHints)745 bool CPDF_DataAvail::CheckEnd(DownloadHints* pHints) {
746   uint32_t req_pos = (uint32_t)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0);
747   uint32_t dwSize = (uint32_t)(m_dwFileLen - req_pos);
748 
749   if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) {
750     uint8_t buffer[1024];
751     m_pFileRead->ReadBlock(buffer, req_pos, dwSize);
752 
753     CFX_RetainPtr<IFX_MemoryStream> file =
754         IFX_MemoryStream::Create(buffer, (size_t)dwSize, false);
755     m_syntaxParser.InitParser(file, 0);
756     m_syntaxParser.RestorePos(dwSize - 1);
757 
758     if (m_syntaxParser.SearchWord("startxref", true, false, dwSize)) {
759       m_syntaxParser.GetNextWord(nullptr);
760 
761       bool bNumber;
762       CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber);
763       if (!bNumber) {
764         m_docStatus = PDF_DATAAVAIL_ERROR;
765         return false;
766       }
767 
768       m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str());
769       if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) {
770         m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
771         return true;
772       }
773 
774       m_dwLastXRefOffset = m_dwXRefOffset;
775       SetStartOffset(m_dwXRefOffset);
776       m_docStatus = PDF_DATAAVAIL_CROSSREF;
777       return true;
778     }
779 
780     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
781     return true;
782   }
783 
784   pHints->AddSegment(req_pos, dwSize);
785   return false;
786 }
787 
CheckCrossRefStream(DownloadHints * pHints,FX_FILESIZE & xref_offset)788 int32_t CPDF_DataAvail::CheckCrossRefStream(DownloadHints* pHints,
789                                             FX_FILESIZE& xref_offset) {
790   xref_offset = 0;
791   uint32_t req_size =
792       (uint32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
793 
794   if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {
795     int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam);
796     CFX_BinaryBuf buf(iSize);
797     uint8_t* pBuf = buf.GetBuffer();
798 
799     m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);
800 
801     CFX_RetainPtr<IFX_MemoryStream> file =
802         IFX_MemoryStream::Create(pBuf, (size_t)iSize, false);
803     m_parser.m_pSyntax->InitParser(file, 0);
804 
805     bool bNumber;
806     CFX_ByteString objnum = m_parser.m_pSyntax->GetNextWord(&bNumber);
807     if (!bNumber)
808       return -1;
809 
810     uint32_t objNum = FXSYS_atoui(objnum.c_str());
811     std::unique_ptr<CPDF_Object> pObj =
812         m_parser.ParseIndirectObjectAt(nullptr, 0, objNum);
813 
814     if (!pObj) {
815       m_Pos += m_parser.m_pSyntax->SavePos();
816       return 0;
817     }
818 
819     CPDF_Dictionary* pDict = pObj->GetDict();
820     CPDF_Name* pName = ToName(pDict ? pDict->GetObjectFor("Type") : nullptr);
821     if (pName && pName->GetString() == "XRef") {
822       m_Pos += m_parser.m_pSyntax->SavePos();
823       xref_offset = pObj->GetDict()->GetIntegerFor("Prev");
824       return 1;
825     }
826     return -1;
827   }
828   pHints->AddSegment(m_Pos, req_size);
829   return 0;
830 }
831 
SetStartOffset(FX_FILESIZE dwOffset)832 void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) {
833   m_Pos = dwOffset;
834 }
835 
GetNextToken(CFX_ByteString & token)836 bool CPDF_DataAvail::GetNextToken(CFX_ByteString& token) {
837   uint8_t ch;
838   if (!GetNextChar(ch))
839     return false;
840 
841   while (1) {
842     while (PDFCharIsWhitespace(ch)) {
843       if (!GetNextChar(ch))
844         return false;
845     }
846 
847     if (ch != '%')
848       break;
849 
850     while (1) {
851       if (!GetNextChar(ch))
852         return false;
853       if (PDFCharIsLineEnding(ch))
854         break;
855     }
856   }
857 
858   uint8_t buffer[256];
859   uint32_t index = 0;
860   if (PDFCharIsDelimiter(ch)) {
861     buffer[index++] = ch;
862     if (ch == '/') {
863       while (1) {
864         if (!GetNextChar(ch))
865           return false;
866 
867         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
868           m_Pos--;
869           CFX_ByteString ret(buffer, index);
870           token = ret;
871           return true;
872         }
873 
874         if (index < sizeof(buffer))
875           buffer[index++] = ch;
876       }
877     } else if (ch == '<') {
878       if (!GetNextChar(ch))
879         return false;
880 
881       if (ch == '<')
882         buffer[index++] = ch;
883       else
884         m_Pos--;
885     } else if (ch == '>') {
886       if (!GetNextChar(ch))
887         return false;
888 
889       if (ch == '>')
890         buffer[index++] = ch;
891       else
892         m_Pos--;
893     }
894 
895     CFX_ByteString ret(buffer, index);
896     token = ret;
897     return true;
898   }
899 
900   while (1) {
901     if (index < sizeof(buffer))
902       buffer[index++] = ch;
903 
904     if (!GetNextChar(ch))
905       return false;
906 
907     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
908       m_Pos--;
909       break;
910     }
911   }
912 
913   token = CFX_ByteString(buffer, index);
914   return true;
915 }
916 
GetNextChar(uint8_t & ch)917 bool CPDF_DataAvail::GetNextChar(uint8_t& ch) {
918   FX_FILESIZE pos = m_Pos;
919   if (pos >= m_dwFileLen)
920     return false;
921 
922   if (m_bufferOffset >= pos ||
923       (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) {
924     FX_FILESIZE read_pos = pos;
925     uint32_t read_size = 512;
926     if ((FX_FILESIZE)read_size > m_dwFileLen)
927       read_size = (uint32_t)m_dwFileLen;
928 
929     if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen)
930       read_pos = m_dwFileLen - read_size;
931 
932     if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size))
933       return false;
934 
935     m_bufferOffset = read_pos;
936     m_bufferSize = read_size;
937   }
938   ch = m_bufferData[pos - m_bufferOffset];
939   m_Pos++;
940   return true;
941 }
942 
CheckCrossRefItem(DownloadHints * pHints)943 bool CPDF_DataAvail::CheckCrossRefItem(DownloadHints* pHints) {
944   int32_t iSize = 0;
945   CFX_ByteString token;
946   while (1) {
947     if (!GetNextToken(token)) {
948       iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
949       pHints->AddSegment(m_Pos, iSize);
950       return false;
951     }
952 
953     if (token == "trailer") {
954       m_dwTrailerOffset = m_Pos;
955       m_docStatus = PDF_DATAAVAIL_TRAILER;
956       return true;
957     }
958   }
959 }
960 
CheckAllCrossRefStream(DownloadHints * pHints)961 bool CPDF_DataAvail::CheckAllCrossRefStream(DownloadHints* pHints) {
962   FX_FILESIZE xref_offset = 0;
963 
964   int32_t nRet = CheckCrossRefStream(pHints, xref_offset);
965   if (nRet == 1) {
966     if (!xref_offset) {
967       m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
968     } else {
969       m_dwCurrentXRefSteam = xref_offset;
970       m_Pos = xref_offset;
971     }
972     return true;
973   }
974 
975   if (nRet == -1)
976     m_docStatus = PDF_DATAAVAIL_ERROR;
977   return false;
978 }
979 
CheckCrossRef(DownloadHints * pHints)980 bool CPDF_DataAvail::CheckCrossRef(DownloadHints* pHints) {
981   int32_t iSize = 0;
982   CFX_ByteString token;
983   if (!GetNextToken(token)) {
984     iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
985     pHints->AddSegment(m_Pos, iSize);
986     return false;
987   }
988 
989   if (token == "xref") {
990     while (1) {
991       if (!GetNextToken(token)) {
992         iSize =
993             (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
994         pHints->AddSegment(m_Pos, iSize);
995         m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM;
996         return false;
997       }
998 
999       if (token == "trailer") {
1000         m_dwTrailerOffset = m_Pos;
1001         m_docStatus = PDF_DATAAVAIL_TRAILER;
1002         return true;
1003       }
1004     }
1005   } else {
1006     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
1007     return true;
1008   }
1009   return false;
1010 }
1011 
CheckTrailerAppend(DownloadHints * pHints)1012 bool CPDF_DataAvail::CheckTrailerAppend(DownloadHints* pHints) {
1013   if (m_Pos < m_dwFileLen) {
1014     FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos();
1015     int32_t iSize = (int32_t)(
1016         dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512);
1017 
1018     if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) {
1019       pHints->AddSegment(dwAppendPos, iSize);
1020       return false;
1021     }
1022   }
1023 
1024   if (m_dwPrevXRefOffset) {
1025     SetStartOffset(m_dwPrevXRefOffset);
1026     m_docStatus = PDF_DATAAVAIL_CROSSREF;
1027   } else {
1028     m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
1029   }
1030   return true;
1031 }
1032 
CheckTrailer(DownloadHints * pHints)1033 bool CPDF_DataAvail::CheckTrailer(DownloadHints* pHints) {
1034   int32_t iTrailerSize =
1035       (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
1036   if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) {
1037     int32_t iSize = (int32_t)(m_Pos + iTrailerSize - m_dwTrailerOffset);
1038     CFX_BinaryBuf buf(iSize);
1039     uint8_t* pBuf = buf.GetBuffer();
1040     if (!pBuf) {
1041       m_docStatus = PDF_DATAAVAIL_ERROR;
1042       return false;
1043     }
1044 
1045     if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize))
1046       return false;
1047 
1048     CFX_RetainPtr<IFX_MemoryStream> file =
1049         IFX_MemoryStream::Create(pBuf, (size_t)iSize, false);
1050     m_syntaxParser.InitParser(file, 0);
1051 
1052     std::unique_ptr<CPDF_Object> pTrailer(
1053         m_syntaxParser.GetObject(nullptr, 0, 0, true));
1054     if (!pTrailer) {
1055       m_Pos += m_syntaxParser.SavePos();
1056       pHints->AddSegment(m_Pos, iTrailerSize);
1057       return false;
1058     }
1059 
1060     if (!pTrailer->IsDictionary())
1061       return false;
1062 
1063     CPDF_Dictionary* pTrailerDict = pTrailer->GetDict();
1064     CPDF_Object* pEncrypt = pTrailerDict->GetObjectFor("Encrypt");
1065     if (ToReference(pEncrypt)) {
1066       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
1067       return true;
1068     }
1069 
1070     uint32_t xrefpos = GetDirectInteger(pTrailerDict, "Prev");
1071     if (xrefpos) {
1072       m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm");
1073       if (m_dwPrevXRefOffset) {
1074         m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
1075       } else {
1076         m_dwPrevXRefOffset = xrefpos;
1077         if (m_dwPrevXRefOffset >= m_dwFileLen) {
1078           m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
1079         } else {
1080           SetStartOffset(m_dwPrevXRefOffset);
1081           m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
1082         }
1083       }
1084       return true;
1085     }
1086     m_dwPrevXRefOffset = 0;
1087     m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
1088     return true;
1089   }
1090   pHints->AddSegment(m_Pos, iTrailerSize);
1091   return false;
1092 }
1093 
CheckPage(uint32_t dwPage,DownloadHints * pHints)1094 bool CPDF_DataAvail::CheckPage(uint32_t dwPage, DownloadHints* pHints) {
1095   while (true) {
1096     switch (m_docStatus) {
1097       case PDF_DATAAVAIL_PAGETREE:
1098         if (!LoadDocPages(pHints))
1099           return false;
1100         break;
1101       case PDF_DATAAVAIL_PAGE:
1102         if (!LoadDocPage(dwPage, pHints))
1103           return false;
1104         break;
1105       case PDF_DATAAVAIL_ERROR:
1106         return LoadAllFile(pHints);
1107       default:
1108         m_bPagesTreeLoad = true;
1109         m_bPagesLoad = true;
1110         m_bCurPageDictLoadOK = true;
1111         m_docStatus = PDF_DATAAVAIL_PAGE;
1112         return true;
1113     }
1114   }
1115 }
1116 
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode,DownloadHints * pHints)1117 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
1118                                         PageNode* pPageNode,
1119                                         DownloadHints* pHints) {
1120   bool bExists = false;
1121   std::unique_ptr<CPDF_Object> pPages = GetObject(dwPageNo, pHints, &bExists);
1122   if (!bExists) {
1123     m_docStatus = PDF_DATAAVAIL_ERROR;
1124     return false;
1125   }
1126 
1127   if (!pPages)
1128     return false;
1129 
1130   CPDF_Array* pArray = pPages->AsArray();
1131   if (!pArray) {
1132     m_docStatus = PDF_DATAAVAIL_ERROR;
1133     return false;
1134   }
1135 
1136   pPageNode->m_type = PDF_PAGENODE_PAGES;
1137   for (size_t i = 0; i < pArray->GetCount(); ++i) {
1138     CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
1139     if (!pKid)
1140       continue;
1141 
1142     auto pNode = pdfium::MakeUnique<PageNode>();
1143     pNode->m_dwPageNo = pKid->GetRefObjNum();
1144     pPageNode->m_ChildNodes.push_back(std::move(pNode));
1145   }
1146   return true;
1147 }
1148 
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode,DownloadHints * pHints)1149 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
1150                                           PageNode* pPageNode,
1151                                           DownloadHints* pHints) {
1152   bool bExists = false;
1153   std::unique_ptr<CPDF_Object> pPage = GetObject(dwPageNo, pHints, &bExists);
1154   if (!bExists) {
1155     m_docStatus = PDF_DATAAVAIL_ERROR;
1156     return false;
1157   }
1158 
1159   if (!pPage)
1160     return false;
1161 
1162   if (pPage->IsArray()) {
1163     pPageNode->m_dwPageNo = dwPageNo;
1164     pPageNode->m_type = PDF_PAGENODE_ARRAY;
1165     return true;
1166   }
1167 
1168   if (!pPage->IsDictionary()) {
1169     m_docStatus = PDF_DATAAVAIL_ERROR;
1170     return false;
1171   }
1172 
1173   pPageNode->m_dwPageNo = dwPageNo;
1174   CPDF_Dictionary* pDict = pPage->GetDict();
1175   CFX_ByteString type = pDict->GetStringFor("Type");
1176   if (type == "Pages") {
1177     pPageNode->m_type = PDF_PAGENODE_PAGES;
1178     CPDF_Object* pKids = pDict->GetObjectFor("Kids");
1179     if (!pKids) {
1180       m_docStatus = PDF_DATAAVAIL_PAGE;
1181       return true;
1182     }
1183 
1184     switch (pKids->GetType()) {
1185       case CPDF_Object::REFERENCE: {
1186         CPDF_Reference* pKid = pKids->AsReference();
1187         auto pNode = pdfium::MakeUnique<PageNode>();
1188         pNode->m_dwPageNo = pKid->GetRefObjNum();
1189         pPageNode->m_ChildNodes.push_back(std::move(pNode));
1190       } break;
1191       case CPDF_Object::ARRAY: {
1192         CPDF_Array* pKidsArray = pKids->AsArray();
1193         for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
1194           CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
1195           if (!pKid)
1196             continue;
1197 
1198           auto pNode = pdfium::MakeUnique<PageNode>();
1199           pNode->m_dwPageNo = pKid->GetRefObjNum();
1200           pPageNode->m_ChildNodes.push_back(std::move(pNode));
1201         }
1202       } break;
1203       default:
1204         break;
1205     }
1206   } else if (type == "Page") {
1207     pPageNode->m_type = PDF_PAGENODE_PAGE;
1208   } else {
1209     m_docStatus = PDF_DATAAVAIL_ERROR;
1210     return false;
1211   }
1212   return true;
1213 }
1214 
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,DownloadHints * pHints,int level)1215 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
1216                                    int32_t iPage,
1217                                    int32_t& iCount,
1218                                    DownloadHints* pHints,
1219                                    int level) {
1220   if (level >= kMaxPageRecursionDepth)
1221     return false;
1222 
1223   int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes);
1224   if (iSize <= 0 || iPage >= iSize) {
1225     m_docStatus = PDF_DATAAVAIL_ERROR;
1226     return false;
1227   }
1228   for (int32_t i = 0; i < iSize; ++i) {
1229     PageNode* pNode = pageNode.m_ChildNodes[i].get();
1230     if (!pNode)
1231       continue;
1232 
1233     if (pNode->m_type == PDF_PAGENODE_UNKNOWN) {
1234       // Updates the type for the unknown page node.
1235       if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode, pHints))
1236         return false;
1237     }
1238     if (pNode->m_type == PDF_PAGENODE_ARRAY) {
1239       // Updates a more specific type for the array page node.
1240       if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints))
1241         return false;
1242     }
1243     switch (pNode->m_type) {
1244       case PDF_PAGENODE_PAGE:
1245         iCount++;
1246         if (iPage == iCount && m_pDocument)
1247           m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
1248         break;
1249       case PDF_PAGENODE_PAGES:
1250         if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1))
1251           return false;
1252         break;
1253       case PDF_PAGENODE_UNKNOWN:
1254       case PDF_PAGENODE_ARRAY:
1255         // Already converted above, error if we get here.
1256         return false;
1257     }
1258     if (iPage == iCount) {
1259       m_docStatus = PDF_DATAAVAIL_DONE;
1260       return true;
1261     }
1262   }
1263   return true;
1264 }
1265 
LoadDocPage(uint32_t dwPage,DownloadHints * pHints)1266 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage, DownloadHints* pHints) {
1267   FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1268   int32_t iPage = safePage.ValueOrDie();
1269   if (m_pDocument->GetPageCount() <= iPage ||
1270       m_pDocument->IsPageLoaded(iPage)) {
1271     m_docStatus = PDF_DATAAVAIL_DONE;
1272     return true;
1273   }
1274   if (m_PageNode.m_type == PDF_PAGENODE_PAGE) {
1275     m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR;
1276     return true;
1277   }
1278   int32_t iCount = -1;
1279   return CheckPageNode(m_PageNode, iPage, iCount, pHints, 0);
1280 }
1281 
CheckPageCount(DownloadHints * pHints)1282 bool CPDF_DataAvail::CheckPageCount(DownloadHints* pHints) {
1283   bool bExists = false;
1284   std::unique_ptr<CPDF_Object> pPages =
1285       GetObject(m_PagesObjNum, pHints, &bExists);
1286   if (!bExists) {
1287     m_docStatus = PDF_DATAAVAIL_ERROR;
1288     return false;
1289   }
1290   if (!pPages)
1291     return false;
1292 
1293   CPDF_Dictionary* pPagesDict = pPages->GetDict();
1294   if (!pPagesDict) {
1295     m_docStatus = PDF_DATAAVAIL_ERROR;
1296     return false;
1297   }
1298   if (!pPagesDict->KeyExist("Kids"))
1299     return true;
1300 
1301   return pPagesDict->GetIntegerFor("Count") > 0;
1302 }
1303 
LoadDocPages(DownloadHints * pHints)1304 bool CPDF_DataAvail::LoadDocPages(DownloadHints* pHints) {
1305   if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode, pHints))
1306     return false;
1307 
1308   if (CheckPageCount(pHints)) {
1309     m_docStatus = PDF_DATAAVAIL_PAGE;
1310     return true;
1311   }
1312 
1313   m_bTotalLoadPageTree = true;
1314   return false;
1315 }
1316 
LoadPages(DownloadHints * pHints)1317 bool CPDF_DataAvail::LoadPages(DownloadHints* pHints) {
1318   while (!m_bPagesTreeLoad) {
1319     if (!CheckPageStatus(pHints))
1320       return false;
1321   }
1322 
1323   if (m_bPagesLoad)
1324     return true;
1325 
1326   m_pDocument->LoadPages();
1327   return false;
1328 }
1329 
CheckLinearizedData(DownloadHints * pHints)1330 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData(
1331     DownloadHints* pHints) {
1332   if (m_bLinearedDataOK)
1333     return DataAvailable;
1334   ASSERT(m_pLinearized);
1335   if (!m_pLinearized->GetLastXRefOffset())
1336     return DataError;
1337 
1338   if (!m_bMainXRefLoadTried) {
1339     FX_SAFE_UINT32 data_size = m_dwFileLen;
1340     data_size -= m_pLinearized->GetLastXRefOffset();
1341     if (!data_size.IsValid())
1342       return DataError;
1343 
1344     if (!m_pFileAvail->IsDataAvail(m_pLinearized->GetLastXRefOffset(),
1345                                    data_size.ValueOrDie())) {
1346       pHints->AddSegment(m_pLinearized->GetLastXRefOffset(),
1347                          data_size.ValueOrDie());
1348       return DataNotAvailable;
1349     }
1350 
1351     CPDF_Parser::Error eRet =
1352         m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
1353     m_bMainXRefLoadTried = true;
1354     if (eRet != CPDF_Parser::SUCCESS)
1355       return DataError;
1356 
1357     if (!PreparePageItem())
1358       return DataNotAvailable;
1359 
1360     m_bMainXRefLoadedOK = true;
1361     m_bLinearedDataOK = true;
1362   }
1363 
1364   return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
1365 }
1366 
CheckPageAnnots(uint32_t dwPage,DownloadHints * pHints)1367 bool CPDF_DataAvail::CheckPageAnnots(uint32_t dwPage, DownloadHints* pHints) {
1368   if (m_objs_array.empty()) {
1369     m_ObjectSet.clear();
1370 
1371     FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1372     CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1373     if (!pPageDict)
1374       return true;
1375 
1376     CPDF_Object* pAnnots = pPageDict->GetObjectFor("Annots");
1377     if (!pAnnots)
1378       return true;
1379 
1380     std::vector<CPDF_Object*> obj_array;
1381     obj_array.push_back(pAnnots);
1382     if (!AreObjectsAvailable(obj_array, false, pHints, m_objs_array))
1383       return false;
1384 
1385     m_objs_array.clear();
1386     return true;
1387   }
1388 
1389   std::vector<CPDF_Object*> new_objs_array;
1390   if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
1391     m_objs_array = new_objs_array;
1392     return false;
1393   }
1394   m_objs_array.clear();
1395   return true;
1396 }
1397 
CheckLinearizedFirstPage(uint32_t dwPage,DownloadHints * pHints)1398 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage(
1399     uint32_t dwPage,
1400     DownloadHints* pHints) {
1401   if (!m_bAnnotsLoad) {
1402     if (!CheckPageAnnots(dwPage, pHints))
1403       return DataNotAvailable;
1404     m_bAnnotsLoad = true;
1405   }
1406   const bool is_page_valid = ValidatePage(dwPage);
1407   (void)is_page_valid;
1408   ASSERT(is_page_valid);
1409   return DataAvailable;
1410 }
1411 
HaveResourceAncestor(CPDF_Dictionary * pDict)1412 bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) {
1413   CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth);
1414   if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth)
1415     return false;
1416 
1417   CPDF_Object* pParent = pDict->GetObjectFor("Parent");
1418   if (!pParent)
1419     return false;
1420 
1421   CPDF_Dictionary* pParentDict = pParent->GetDict();
1422   if (!pParentDict)
1423     return false;
1424 
1425   CPDF_Object* pRet = pParentDict->GetObjectFor("Resources");
1426   if (pRet) {
1427     m_pPageResource = pRet;
1428     return true;
1429   }
1430 
1431   return HaveResourceAncestor(pParentDict);
1432 }
1433 
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)1434 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
1435     uint32_t dwPage,
1436     DownloadHints* pHints) {
1437   if (!m_pDocument)
1438     return DataError;
1439 
1440   if (IsFirstCheck(dwPage)) {
1441     m_bCurPageDictLoadOK = false;
1442     m_bPageLoadedOK = false;
1443     m_bAnnotsLoad = false;
1444     m_bNeedDownLoadResource = false;
1445     m_objs_array.clear();
1446     m_ObjectSet.clear();
1447   }
1448 
1449   if (pdfium::ContainsKey(m_pagesLoadState, dwPage))
1450     return DataAvailable;
1451 
1452   if (m_pLinearized) {
1453     if (dwPage == m_pLinearized->GetFirstPageNo()) {
1454       DocAvailStatus nRet = CheckLinearizedFirstPage(dwPage, pHints);
1455       if (nRet == DataAvailable)
1456         m_pagesLoadState.insert(dwPage);
1457       return nRet;
1458     }
1459 
1460     DocAvailStatus nResult = CheckLinearizedData(pHints);
1461     if (nResult != DataAvailable)
1462       return nResult;
1463 
1464     if (m_pHintTables) {
1465       nResult = m_pHintTables->CheckPage(dwPage, pHints);
1466       if (nResult != DataAvailable)
1467         return nResult;
1468       m_pagesLoadState.insert(dwPage);
1469       return GetPage(dwPage) ? DataAvailable : DataError;
1470     }
1471 
1472     if (m_bMainXRefLoadedOK) {
1473       if (m_bTotalLoadPageTree) {
1474         if (!LoadPages(pHints))
1475           return DataNotAvailable;
1476       } else {
1477         if (!m_bCurPageDictLoadOK && !CheckPage(dwPage, pHints))
1478           return DataNotAvailable;
1479       }
1480     } else {
1481       if (!LoadAllFile(pHints))
1482         return DataNotAvailable;
1483       m_pDocument->GetParser()->RebuildCrossRef();
1484       ResetFirstCheck(dwPage);
1485       return DataAvailable;
1486     }
1487   } else {
1488     if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK &&
1489         !CheckPage(dwPage, pHints)) {
1490       return DataNotAvailable;
1491     }
1492   }
1493 
1494   if (m_bHaveAcroForm && !m_bAcroFormLoad) {
1495     if (!CheckAcroFormSubObject(pHints))
1496       return DataNotAvailable;
1497     m_bAcroFormLoad = true;
1498   }
1499 
1500   if (!m_bPageLoadedOK) {
1501     if (m_objs_array.empty()) {
1502       m_ObjectSet.clear();
1503 
1504       FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1505       m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1506       if (!m_pPageDict) {
1507         ResetFirstCheck(dwPage);
1508         // This is XFA page.
1509         return DataAvailable;
1510       }
1511 
1512       std::vector<CPDF_Object*> obj_array;
1513       obj_array.push_back(m_pPageDict);
1514       if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array))
1515         return DataNotAvailable;
1516 
1517       m_objs_array.clear();
1518     } else {
1519       std::vector<CPDF_Object*> new_objs_array;
1520       if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
1521         m_objs_array = new_objs_array;
1522         return DataNotAvailable;
1523       }
1524     }
1525     m_objs_array.clear();
1526     m_bPageLoadedOK = true;
1527   }
1528 
1529   if (!m_bAnnotsLoad) {
1530     if (!CheckPageAnnots(dwPage, pHints))
1531       return DataNotAvailable;
1532     m_bAnnotsLoad = true;
1533   }
1534 
1535   if (m_pPageDict && !m_bNeedDownLoadResource) {
1536     m_pPageResource = m_pPageDict->GetObjectFor("Resources");
1537     m_bNeedDownLoadResource =
1538         m_pPageResource || HaveResourceAncestor(m_pPageDict);
1539   }
1540 
1541   if (m_bNeedDownLoadResource) {
1542     if (!CheckResources(pHints))
1543       return DataNotAvailable;
1544     m_bNeedDownLoadResource = false;
1545   }
1546 
1547   m_bPageLoadedOK = false;
1548   m_bAnnotsLoad = false;
1549   m_bCurPageDictLoadOK = false;
1550 
1551   ResetFirstCheck(dwPage);
1552   m_pagesLoadState.insert(dwPage);
1553   const bool is_page_valid = ValidatePage(dwPage);
1554   (void)is_page_valid;
1555   ASSERT(is_page_valid);
1556   return DataAvailable;
1557 }
1558 
CheckResources(DownloadHints * pHints)1559 bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) {
1560   if (m_objs_array.empty()) {
1561     std::vector<CPDF_Object*> obj_array;
1562     obj_array.push_back(m_pPageResource);
1563     if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array))
1564       return false;
1565 
1566     m_objs_array.clear();
1567     return true;
1568   }
1569   std::vector<CPDF_Object*> new_objs_array;
1570   if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
1571     m_objs_array = new_objs_array;
1572     return false;
1573   }
1574   m_objs_array.clear();
1575   return true;
1576 }
1577 
GetLinearizedMainXRefInfo(FX_FILESIZE * pPos,uint32_t * pSize)1578 void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos,
1579                                                uint32_t* pSize) {
1580   if (pPos)
1581     *pPos = m_dwLastXRefOffset;
1582   if (pSize)
1583     *pSize = (uint32_t)(m_dwFileLen - m_dwLastXRefOffset);
1584 }
1585 
GetPageCount() const1586 int CPDF_DataAvail::GetPageCount() const {
1587   if (m_pLinearized)
1588     return m_pLinearized->GetPageCount();
1589   return m_pDocument ? m_pDocument->GetPageCount() : 0;
1590 }
1591 
GetPage(int index)1592 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
1593   if (!m_pDocument || index < 0 || index >= GetPageCount())
1594     return nullptr;
1595   CPDF_Dictionary* page = m_pDocument->GetPage(index);
1596   if (page)
1597     return page;
1598   if (!m_pLinearized || !m_pHintTables)
1599     return nullptr;
1600 
1601   if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
1602     return nullptr;
1603   FX_FILESIZE szPageStartPos = 0;
1604   FX_FILESIZE szPageLength = 0;
1605   uint32_t dwObjNum = 0;
1606   const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
1607                                                      &szPageLength, &dwObjNum);
1608   if (!bPagePosGot || !dwObjNum)
1609     return nullptr;
1610   // We should say to the document, which object is the page.
1611   m_pDocument->SetPageObjNum(index, dwObjNum);
1612   // Page object already can be parsed in document.
1613   if (!m_pDocument->GetIndirectObject(dwObjNum)) {
1614     m_syntaxParser.InitParser(
1615         m_pFileRead, pdfium::base::checked_cast<uint32_t>(szPageStartPos));
1616     m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
1617         dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument));
1618   }
1619   const bool is_page_valid = ValidatePage(index);
1620   (void)is_page_valid;
1621   ASSERT(is_page_valid);
1622   return m_pDocument->GetPage(index);
1623 }
1624 
IsFormAvail(DownloadHints * pHints)1625 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
1626     DownloadHints* pHints) {
1627   if (!m_pDocument)
1628     return FormAvailable;
1629   if (m_pLinearized) {
1630     DocAvailStatus nDocStatus = CheckLinearizedData(pHints);
1631     if (nDocStatus == DataError)
1632       return FormError;
1633     if (nDocStatus == DataNotAvailable)
1634       return FormNotAvailable;
1635   }
1636 
1637   if (!m_bLinearizedFormParamLoad) {
1638     CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
1639     if (!pRoot)
1640       return FormAvailable;
1641 
1642     CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
1643     if (!pAcroForm)
1644       return FormNotExist;
1645 
1646     m_objs_array.push_back(pAcroForm->GetDict());
1647     m_bLinearizedFormParamLoad = true;
1648   }
1649 
1650   std::vector<CPDF_Object*> new_objs_array;
1651   if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
1652     m_objs_array = new_objs_array;
1653     return FormNotAvailable;
1654   }
1655 
1656   m_objs_array.clear();
1657   const bool is_form_valid = ValidateForm();
1658   (void)is_form_valid;
1659   ASSERT(is_form_valid);
1660   return FormAvailable;
1661 }
1662 
ValidatePage(uint32_t dwPage)1663 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) {
1664   FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1665   CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1666   if (!pPageDict)
1667     return false;
1668   std::vector<CPDF_Object*> obj_array;
1669   obj_array.push_back(pPageDict);
1670   std::vector<CPDF_Object*> dummy;
1671   return AreObjectsAvailable(obj_array, true, nullptr, dummy);
1672 }
1673 
ValidateForm()1674 bool CPDF_DataAvail::ValidateForm() {
1675   CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
1676   if (!pRoot)
1677     return true;
1678   CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
1679   if (!pAcroForm)
1680     return false;
1681   std::vector<CPDF_Object*> obj_array;
1682   obj_array.push_back(pAcroForm);
1683   std::vector<CPDF_Object*> dummy;
1684   return AreObjectsAvailable(obj_array, true, nullptr, dummy);
1685 }
1686 
PageNode()1687 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
1688 
~PageNode()1689 CPDF_DataAvail::PageNode::~PageNode() {}
1690