• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Copyright 2016 PDFium Authors. All rights reserved.
2  // Use of this source code is governed by a BSD-style license that can be
3  // found in the LICENSE file.
4  
5  // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6  
7  #include "core/fpdfapi/parser/cpdf_data_avail.h"
8  
9  #include <algorithm>
10  #include <memory>
11  #include <utility>
12  
13  #include "core/fpdfapi/cpdf_modulemgr.h"
14  #include "core/fpdfapi/parser/cpdf_array.h"
15  #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
16  #include "core/fpdfapi/parser/cpdf_dictionary.h"
17  #include "core/fpdfapi/parser/cpdf_document.h"
18  #include "core/fpdfapi/parser/cpdf_hint_tables.h"
19  #include "core/fpdfapi/parser/cpdf_linearized_header.h"
20  #include "core/fpdfapi/parser/cpdf_name.h"
21  #include "core/fpdfapi/parser/cpdf_number.h"
22  #include "core/fpdfapi/parser/cpdf_page_object_avail.h"
23  #include "core/fpdfapi/parser/cpdf_read_validator.h"
24  #include "core/fpdfapi/parser/cpdf_reference.h"
25  #include "core/fpdfapi/parser/cpdf_stream.h"
26  #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27  #include "core/fxcrt/cfx_memorystream.h"
28  #include "core/fxcrt/fx_extension.h"
29  #include "core/fxcrt/fx_safe_types.h"
30  #include "third_party/base/numerics/safe_conversions.h"
31  #include "third_party/base/ptr_util.h"
32  #include "third_party/base/stl_util.h"
33  
34  namespace {
35  
36  // static
GetResourceObject(const CPDF_Dictionary * pDict)37  const CPDF_Object* GetResourceObject(const CPDF_Dictionary* pDict) {
38    constexpr size_t kMaxHierarchyDepth = 64;
39    size_t depth = 0;
40  
41    const CPDF_Dictionary* dictionary_to_check = pDict;
42    while (dictionary_to_check) {
43      const CPDF_Object* result = dictionary_to_check->GetObjectFor("Resources");
44      if (result)
45        return result;
46      const CPDF_Object* parent = dictionary_to_check->GetObjectFor("Parent");
47      dictionary_to_check = parent ? parent->GetDict() : nullptr;
48  
49      if (++depth > kMaxHierarchyDepth) {
50        // We have cycle in parents hierarchy.
51        return nullptr;
52      }
53    }
54    return nullptr;
55  }
56  
57  class HintsScope {
58   public:
HintsScope(CPDF_ReadValidator * validator,CPDF_DataAvail::DownloadHints * hints)59    HintsScope(CPDF_ReadValidator* validator,
60               CPDF_DataAvail::DownloadHints* hints)
61        : validator_(validator) {
62      ASSERT(validator_);
63      validator_->SetDownloadHints(hints);
64    }
65  
~HintsScope()66    ~HintsScope() { validator_->SetDownloadHints(nullptr); }
67  
68   private:
69    UnownedPtr<CPDF_ReadValidator> validator_;
70  };
71  
72  }  // namespace
73  
~FileAvail()74  CPDF_DataAvail::FileAvail::~FileAvail() {}
75  
~DownloadHints()76  CPDF_DataAvail::DownloadHints::~DownloadHints() {}
77  
CPDF_DataAvail(FileAvail * pFileAvail,const RetainPtr<IFX_SeekableReadStream> & pFileRead,bool bSupportHintTable)78  CPDF_DataAvail::CPDF_DataAvail(
79      FileAvail* pFileAvail,
80      const RetainPtr<IFX_SeekableReadStream>& pFileRead,
81      bool bSupportHintTable)
82      : m_pFileAvail(pFileAvail),
83        m_pFileRead(
84            pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, m_pFileAvail)),
85        m_dwFileLen(m_pFileRead->GetSize()),
86        m_bSupportHintTable(bSupportHintTable) {}
87  
~CPDF_DataAvail()88  CPDF_DataAvail::~CPDF_DataAvail() {
89    m_pHintTables.reset();
90  }
91  
IsDocAvail(DownloadHints * pHints)92  CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
93      DownloadHints* pHints) {
94    if (!m_dwFileLen)
95      return DataError;
96  
97    const HintsScope hints_scope(m_pFileRead.Get(), pHints);
98  
99    while (!m_bDocAvail) {
100      if (!CheckDocStatus())
101        return DataNotAvailable;
102    }
103  
104    return DataAvailable;
105  }
106  
CheckDocStatus()107  bool CPDF_DataAvail::CheckDocStatus() {
108    switch (m_docStatus) {
109      case PDF_DATAAVAIL_HEADER:
110        return CheckHeader();
111      case PDF_DATAAVAIL_FIRSTPAGE:
112        return CheckFirstPage();
113      case PDF_DATAAVAIL_HINTTABLE:
114        return CheckHintTables();
115      case PDF_DATAAVAIL_LOADALLCROSSREF:
116        return CheckAndLoadAllXref();
117      case PDF_DATAAVAIL_LOADALLFILE:
118        return LoadAllFile();
119      case PDF_DATAAVAIL_ROOT:
120        return CheckRoot();
121      case PDF_DATAAVAIL_INFO:
122        return CheckInfo();
123      case PDF_DATAAVAIL_PAGETREE:
124        if (m_bTotalLoadPageTree)
125          return CheckPages();
126        return LoadDocPages();
127      case PDF_DATAAVAIL_PAGE:
128        if (m_bTotalLoadPageTree)
129          return CheckPage();
130        m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
131        return true;
132      case PDF_DATAAVAIL_ERROR:
133        return LoadAllFile();
134      case PDF_DATAAVAIL_PAGE_LATERLOAD:
135        m_docStatus = PDF_DATAAVAIL_PAGE;
136      default:
137        m_bDocAvail = true;
138        return true;
139    }
140  }
141  
CheckPageStatus()142  bool CPDF_DataAvail::CheckPageStatus() {
143    switch (m_docStatus) {
144      case PDF_DATAAVAIL_PAGETREE:
145        return CheckPages();
146      case PDF_DATAAVAIL_PAGE:
147        return CheckPage();
148      case PDF_DATAAVAIL_ERROR:
149        return LoadAllFile();
150      default:
151        m_bPagesTreeLoad = true;
152        m_bPagesLoad = true;
153        return true;
154    }
155  }
156  
LoadAllFile()157  bool CPDF_DataAvail::LoadAllFile() {
158    if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
159      m_docStatus = PDF_DATAAVAIL_DONE;
160      return true;
161    }
162    return false;
163  }
164  
CheckAndLoadAllXref()165  bool CPDF_DataAvail::CheckAndLoadAllXref() {
166    if (!m_pCrossRefAvail) {
167      const CPDF_ReadValidator::Session read_session(GetValidator().Get());
168      const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
169      if (GetValidator()->has_read_problems())
170        return false;
171  
172      if (last_xref_offset <= 0) {
173        m_docStatus = PDF_DATAAVAIL_ERROR;
174        return false;
175      }
176  
177      m_pCrossRefAvail = pdfium::MakeUnique<CPDF_CrossRefAvail>(GetSyntaxParser(),
178                                                                last_xref_offset);
179    }
180  
181    switch (m_pCrossRefAvail->CheckAvail()) {
182      case DocAvailStatus::DataAvailable:
183        break;
184      case DocAvailStatus::DataNotAvailable:
185        return false;
186      case DocAvailStatus::DataError:
187        m_docStatus = PDF_DATAAVAIL_ERROR;
188        return false;
189      default:
190        NOTREACHED();
191        return false;
192    }
193  
194    if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
195        !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
196      m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
197      return false;
198    }
199  
200    m_dwRootObjNum = m_parser.GetRootObjNum();
201    m_dwInfoObjNum = m_parser.GetInfoObjNum();
202    m_pCurrentParser = &m_parser;
203    m_docStatus = PDF_DATAAVAIL_ROOT;
204    return true;
205  }
206  
GetObject(uint32_t objnum,bool * pExistInFile)207  std::unique_ptr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
208                                                         bool* pExistInFile) {
209    CPDF_Parser* pParser = nullptr;
210  
211    if (pExistInFile)
212      *pExistInFile = true;
213  
214    pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
215  
216    std::unique_ptr<CPDF_Object> pRet;
217    if (pParser) {
218      const CPDF_ReadValidator::Session read_session(GetValidator().Get());
219      pRet = pParser->ParseIndirectObject(nullptr, objnum);
220      if (GetValidator()->has_read_problems())
221        return nullptr;
222    }
223  
224    if (!pRet && pExistInFile)
225      *pExistInFile = false;
226  
227    return pRet;
228  }
229  
CheckInfo()230  bool CPDF_DataAvail::CheckInfo() {
231    bool bExist = false;
232    std::unique_ptr<CPDF_Object> pInfo = GetObject(m_dwInfoObjNum, &bExist);
233    if (bExist && !pInfo) {
234      if (m_docStatus == PDF_DATAAVAIL_ERROR) {
235        m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
236        return true;
237      }
238      return false;
239    }
240    m_docStatus = PDF_DATAAVAIL_PAGETREE;
241    return true;
242  }
243  
CheckRoot()244  bool CPDF_DataAvail::CheckRoot() {
245    bool bExist = false;
246    m_pRoot = GetObject(m_dwRootObjNum, &bExist);
247    if (!bExist) {
248      m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
249      return true;
250    }
251  
252    if (!m_pRoot) {
253      if (m_docStatus == PDF_DATAAVAIL_ERROR) {
254        m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
255        return true;
256      }
257      return false;
258    }
259  
260    CPDF_Dictionary* pDict = m_pRoot->GetDict();
261    if (!pDict) {
262      m_docStatus = PDF_DATAAVAIL_ERROR;
263      return false;
264    }
265  
266    CPDF_Reference* pRef = ToReference(pDict->GetObjectFor("Pages"));
267    if (!pRef) {
268      m_docStatus = PDF_DATAAVAIL_ERROR;
269      return false;
270    }
271  
272    m_PagesObjNum = pRef->GetRefObjNum();
273  
274    m_docStatus = m_dwInfoObjNum ? PDF_DATAAVAIL_INFO : PDF_DATAAVAIL_PAGETREE;
275    return true;
276  }
277  
PreparePageItem()278  bool CPDF_DataAvail::PreparePageItem() {
279    const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
280    CPDF_Reference* pRef =
281        ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
282    if (!pRef) {
283      m_docStatus = PDF_DATAAVAIL_ERROR;
284      return false;
285    }
286  
287    m_PagesObjNum = pRef->GetRefObjNum();
288    m_pCurrentParser = m_pDocument->GetParser();
289    m_docStatus = PDF_DATAAVAIL_PAGETREE;
290    return true;
291  }
292  
IsFirstCheck(uint32_t dwPage)293  bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
294    return m_pageMapCheckState.insert(dwPage).second;
295  }
296  
ResetFirstCheck(uint32_t dwPage)297  void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
298    m_pageMapCheckState.erase(dwPage);
299  }
300  
CheckPage()301  bool CPDF_DataAvail::CheckPage() {
302    std::vector<uint32_t> UnavailObjList;
303    for (uint32_t dwPageObjNum : m_PageObjList) {
304      bool bExists = false;
305      std::unique_ptr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
306      if (!pObj) {
307        if (bExists)
308          UnavailObjList.push_back(dwPageObjNum);
309        continue;
310      }
311      CPDF_Array* pArray = ToArray(pObj.get());
312      if (pArray) {
313        for (const auto& pArrayObj : *pArray) {
314          if (CPDF_Reference* pRef = ToReference(pArrayObj.get()))
315            UnavailObjList.push_back(pRef->GetRefObjNum());
316        }
317      }
318      if (!pObj->IsDictionary())
319        continue;
320  
321      ByteString type = pObj->GetDict()->GetStringFor("Type");
322      if (type == "Pages") {
323        m_PagesArray.push_back(std::move(pObj));
324        continue;
325      }
326    }
327    m_PageObjList.clear();
328    if (!UnavailObjList.empty()) {
329      m_PageObjList = std::move(UnavailObjList);
330      return false;
331    }
332    size_t iPages = m_PagesArray.size();
333    for (size_t i = 0; i < iPages; ++i) {
334      std::unique_ptr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
335      if (pPages && !GetPageKids(m_pCurrentParser, pPages.get())) {
336        m_PagesArray.clear();
337        m_docStatus = PDF_DATAAVAIL_ERROR;
338        return false;
339      }
340    }
341    m_PagesArray.clear();
342    if (m_PageObjList.empty())
343      m_docStatus = PDF_DATAAVAIL_DONE;
344  
345    return true;
346  }
347  
GetPageKids(CPDF_Parser * pParser,CPDF_Object * pPages)348  bool CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) {
349    if (!pParser) {
350      m_docStatus = PDF_DATAAVAIL_ERROR;
351      return false;
352    }
353  
354    CPDF_Dictionary* pDict = pPages->GetDict();
355    CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
356    if (!pKids)
357      return true;
358  
359    switch (pKids->GetType()) {
360      case CPDF_Object::REFERENCE:
361        m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum());
362        break;
363      case CPDF_Object::ARRAY: {
364        CPDF_Array* pKidsArray = pKids->AsArray();
365        for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
366          if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i)))
367            m_PageObjList.push_back(pRef->GetRefObjNum());
368        }
369        break;
370      }
371      default:
372        m_docStatus = PDF_DATAAVAIL_ERROR;
373        return false;
374    }
375    return true;
376  }
377  
CheckPages()378  bool CPDF_DataAvail::CheckPages() {
379    bool bExists = false;
380    std::unique_ptr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
381    if (!bExists) {
382      m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
383      return true;
384    }
385  
386    if (!pPages) {
387      if (m_docStatus == PDF_DATAAVAIL_ERROR) {
388        m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
389        return true;
390      }
391      return false;
392    }
393  
394    if (!GetPageKids(m_pCurrentParser, pPages.get())) {
395      m_docStatus = PDF_DATAAVAIL_ERROR;
396      return false;
397    }
398  
399    m_docStatus = PDF_DATAAVAIL_PAGE;
400    return true;
401  }
402  
CheckHeader()403  bool CPDF_DataAvail::CheckHeader() {
404    switch (CheckHeaderAndLinearized()) {
405      case DocAvailStatus::DataAvailable:
406        m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE
407                                    : PDF_DATAAVAIL_LOADALLCROSSREF;
408        return true;
409      case DocAvailStatus::DataNotAvailable:
410        return false;
411      case DocAvailStatus::DataError:
412        m_docStatus = PDF_DATAAVAIL_ERROR;
413        return true;
414      default:
415        NOTREACHED();
416        return false;
417    }
418  }
419  
CheckFirstPage()420  bool CPDF_DataAvail::CheckFirstPage() {
421    if (!m_pLinearized->GetFirstPageEndOffset() ||
422        !m_pLinearized->GetFileSize() ||
423        !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
424      m_docStatus = PDF_DATAAVAIL_ERROR;
425      return false;
426    }
427  
428    uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
429    dwEnd += 512;
430    if ((FX_FILESIZE)dwEnd > m_dwFileLen)
431      dwEnd = (uint32_t)m_dwFileLen;
432  
433    const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
434    const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
435    if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
436                                                               data_size))
437      return false;
438  
439    m_docStatus =
440        m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
441    return true;
442  }
443  
CheckHintTables()444  bool CPDF_DataAvail::CheckHintTables() {
445    if (m_pLinearized->GetPageCount() <= 1) {
446      m_docStatus = PDF_DATAAVAIL_DONE;
447      return true;
448    }
449    if (!m_pLinearized->HasHintTable()) {
450      m_docStatus = PDF_DATAAVAIL_ERROR;
451      return false;
452    }
453  
454    const FX_FILESIZE szHintStart = m_pLinearized->GetHintStart();
455    const uint32_t szHintLength = m_pLinearized->GetHintLength();
456  
457    if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(szHintStart,
458                                                               szHintLength))
459      return false;
460  
461    auto pHintTables = pdfium::MakeUnique<CPDF_HintTables>(GetValidator().Get(),
462                                                           m_pLinearized.get());
463    std::unique_ptr<CPDF_Object> pHintStream =
464        ParseIndirectObjectAt(szHintStart, 0);
465    CPDF_Stream* pStream = ToStream(pHintStream.get());
466    if (pStream && pHintTables->LoadHintStream(pStream))
467      m_pHintTables = std::move(pHintTables);
468  
469    m_docStatus = PDF_DATAAVAIL_DONE;
470    return true;
471  }
472  
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList)473  std::unique_ptr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
474      FX_FILESIZE pos,
475      uint32_t objnum,
476      CPDF_IndirectObjectHolder* pObjList) {
477    const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
478    GetSyntaxParser()->SetPos(pos);
479    std::unique_ptr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
480        pObjList, CPDF_SyntaxParser::ParseType::kLoose);
481    GetSyntaxParser()->SetPos(SavedPos);
482    return (result && (!objnum || result->GetObjNum() == objnum))
483               ? std::move(result)
484               : nullptr;
485  }
486  
IsLinearizedPDF()487  CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
488    switch (CheckHeaderAndLinearized()) {
489      case DocAvailStatus::DataAvailable:
490        return m_pLinearized ? DocLinearizationStatus::Linearized
491                             : DocLinearizationStatus::NotLinearized;
492      case DocAvailStatus::DataNotAvailable:
493        return DocLinearizationStatus::LinearizationUnknown;
494      case DocAvailStatus::DataError:
495        return DocLinearizationStatus::NotLinearized;
496      default:
497        NOTREACHED();
498        return DocLinearizationStatus::LinearizationUnknown;
499    }
500  }
501  
CheckHeaderAndLinearized()502  CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
503    if (m_bHeaderAvail)
504      return DocAvailStatus::DataAvailable;
505  
506    const CPDF_ReadValidator::Session read_session(GetValidator().Get());
507    const int32_t header_offset = GetHeaderOffset(GetValidator());
508    if (GetValidator()->has_read_problems())
509      return DocAvailStatus::DataNotAvailable;
510  
511    if (header_offset == kInvalidHeaderOffset)
512      return DocAvailStatus::DataError;
513  
514    m_parser.m_pSyntax->InitParserWithValidator(GetValidator(), header_offset);
515    m_pLinearized = m_parser.ParseLinearizedHeader();
516    if (GetValidator()->has_read_problems())
517      return DocAvailStatus::DataNotAvailable;
518  
519    m_bHeaderAvail = true;
520    return DocAvailStatus::DataAvailable;
521  }
522  
CheckPage(uint32_t dwPage)523  bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
524    while (true) {
525      switch (m_docStatus) {
526        case PDF_DATAAVAIL_PAGETREE:
527          if (!LoadDocPages())
528            return false;
529          break;
530        case PDF_DATAAVAIL_PAGE:
531          if (!LoadDocPage(dwPage))
532            return false;
533          break;
534        case PDF_DATAAVAIL_ERROR:
535          return LoadAllFile();
536        default:
537          m_bPagesTreeLoad = true;
538          m_bPagesLoad = true;
539          m_bCurPageDictLoadOK = true;
540          m_docStatus = PDF_DATAAVAIL_PAGE;
541          return true;
542      }
543    }
544  }
545  
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode)546  bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
547                                          PageNode* pPageNode) {
548    bool bExists = false;
549    std::unique_ptr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
550    if (!bExists) {
551      m_docStatus = PDF_DATAAVAIL_ERROR;
552      return false;
553    }
554  
555    if (!pPages)
556      return false;
557  
558    CPDF_Array* pArray = pPages->AsArray();
559    if (!pArray) {
560      m_docStatus = PDF_DATAAVAIL_ERROR;
561      return false;
562    }
563  
564    pPageNode->m_type = PDF_PAGENODE_PAGES;
565    for (size_t i = 0; i < pArray->GetCount(); ++i) {
566      CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
567      if (!pKid)
568        continue;
569  
570      auto pNode = pdfium::MakeUnique<PageNode>();
571      pNode->m_dwPageNo = pKid->GetRefObjNum();
572      pPageNode->m_ChildNodes.push_back(std::move(pNode));
573    }
574    return true;
575  }
576  
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode)577  bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
578                                            PageNode* pPageNode) {
579    bool bExists = false;
580    std::unique_ptr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
581    if (!bExists) {
582      m_docStatus = PDF_DATAAVAIL_ERROR;
583      return false;
584    }
585  
586    if (!pPage)
587      return false;
588  
589    if (pPage->IsArray()) {
590      pPageNode->m_dwPageNo = dwPageNo;
591      pPageNode->m_type = PDF_PAGENODE_ARRAY;
592      return true;
593    }
594  
595    if (!pPage->IsDictionary()) {
596      m_docStatus = PDF_DATAAVAIL_ERROR;
597      return false;
598    }
599  
600    pPageNode->m_dwPageNo = dwPageNo;
601    CPDF_Dictionary* pDict = pPage->GetDict();
602    const ByteString type = pDict->GetStringFor("Type");
603    if (type == "Page") {
604      pPageNode->m_type = PDF_PAGENODE_PAGE;
605      return true;
606    }
607  
608    if (type != "Pages") {
609      m_docStatus = PDF_DATAAVAIL_ERROR;
610      return false;
611    }
612  
613    pPageNode->m_type = PDF_PAGENODE_PAGES;
614    CPDF_Object* pKids = pDict->GetObjectFor("Kids");
615    if (!pKids) {
616      m_docStatus = PDF_DATAAVAIL_PAGE;
617      return true;
618    }
619  
620    switch (pKids->GetType()) {
621      case CPDF_Object::REFERENCE: {
622        CPDF_Reference* pKid = pKids->AsReference();
623        auto pNode = pdfium::MakeUnique<PageNode>();
624        pNode->m_dwPageNo = pKid->GetRefObjNum();
625        pPageNode->m_ChildNodes.push_back(std::move(pNode));
626        break;
627      }
628      case CPDF_Object::ARRAY: {
629        CPDF_Array* pKidsArray = pKids->AsArray();
630        for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
631          CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
632          if (!pKid)
633            continue;
634  
635          auto pNode = pdfium::MakeUnique<PageNode>();
636          pNode->m_dwPageNo = pKid->GetRefObjNum();
637          pPageNode->m_ChildNodes.push_back(std::move(pNode));
638        }
639        break;
640      }
641      default:
642        break;
643    }
644    return true;
645  }
646  
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,int level)647  bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
648                                     int32_t iPage,
649                                     int32_t& iCount,
650                                     int level) {
651    if (level >= kMaxPageRecursionDepth)
652      return false;
653  
654    int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes);
655    if (iSize <= 0 || iPage >= iSize) {
656      m_docStatus = PDF_DATAAVAIL_ERROR;
657      return false;
658    }
659    for (int32_t i = 0; i < iSize; ++i) {
660      PageNode* pNode = pageNode.m_ChildNodes[i].get();
661      if (!pNode)
662        continue;
663  
664      if (pNode->m_type == PDF_PAGENODE_UNKNOWN) {
665        // Updates the type for the unknown page node.
666        if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
667          return false;
668      }
669      if (pNode->m_type == PDF_PAGENODE_ARRAY) {
670        // Updates a more specific type for the array page node.
671        if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
672          return false;
673      }
674      switch (pNode->m_type) {
675        case PDF_PAGENODE_PAGE:
676          iCount++;
677          if (iPage == iCount && m_pDocument)
678            m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
679          break;
680        case PDF_PAGENODE_PAGES:
681          if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
682            return false;
683          break;
684        case PDF_PAGENODE_UNKNOWN:
685        case PDF_PAGENODE_ARRAY:
686          // Already converted above, error if we get here.
687          return false;
688      }
689      if (iPage == iCount) {
690        m_docStatus = PDF_DATAAVAIL_DONE;
691        return true;
692      }
693    }
694    return true;
695  }
696  
LoadDocPage(uint32_t dwPage)697  bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
698    FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
699    int32_t iPage = safePage.ValueOrDie();
700    if (m_pDocument->GetPageCount() <= iPage ||
701        m_pDocument->IsPageLoaded(iPage)) {
702      m_docStatus = PDF_DATAAVAIL_DONE;
703      return true;
704    }
705    if (m_PageNode.m_type == PDF_PAGENODE_PAGE) {
706      m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR;
707      return true;
708    }
709    int32_t iCount = -1;
710    return CheckPageNode(m_PageNode, iPage, iCount, 0);
711  }
712  
CheckPageCount()713  bool CPDF_DataAvail::CheckPageCount() {
714    bool bExists = false;
715    std::unique_ptr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
716    if (!bExists) {
717      m_docStatus = PDF_DATAAVAIL_ERROR;
718      return false;
719    }
720    if (!pPages)
721      return false;
722  
723    CPDF_Dictionary* pPagesDict = pPages->GetDict();
724    if (!pPagesDict) {
725      m_docStatus = PDF_DATAAVAIL_ERROR;
726      return false;
727    }
728    if (!pPagesDict->KeyExist("Kids"))
729      return true;
730  
731    return pPagesDict->GetIntegerFor("Count") > 0;
732  }
733  
LoadDocPages()734  bool CPDF_DataAvail::LoadDocPages() {
735    if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
736      return false;
737  
738    if (CheckPageCount()) {
739      m_docStatus = PDF_DATAAVAIL_PAGE;
740      return true;
741    }
742  
743    m_bTotalLoadPageTree = true;
744    return false;
745  }
746  
LoadPages()747  bool CPDF_DataAvail::LoadPages() {
748    while (!m_bPagesTreeLoad) {
749      if (!CheckPageStatus())
750        return false;
751    }
752  
753    if (m_bPagesLoad)
754      return true;
755  
756    m_pDocument->LoadPages();
757    return false;
758  }
759  
CheckLinearizedData()760  CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
761    if (m_bLinearedDataOK)
762      return DataAvailable;
763    ASSERT(m_pLinearized);
764    if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
765        !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
766      return DataError;
767    }
768  
769    if (!m_bMainXRefLoadTried) {
770      const FX_SAFE_FILESIZE main_xref_offset =
771          m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
772      if (!main_xref_offset.IsValid())
773        return DataError;
774  
775      if (main_xref_offset.ValueOrDie() == 0)
776        return DataAvailable;
777  
778      FX_SAFE_SIZE_T data_size = m_dwFileLen;
779      data_size -= main_xref_offset.ValueOrDie();
780      if (!data_size.IsValid())
781        return DataError;
782  
783      if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
784              main_xref_offset.ValueOrDie(), data_size.ValueOrDie()))
785        return DataNotAvailable;
786  
787      CPDF_Parser::Error eRet =
788          m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
789      m_bMainXRefLoadTried = true;
790      if (eRet != CPDF_Parser::SUCCESS)
791        return DataError;
792  
793      if (!PreparePageItem())
794        return DataNotAvailable;
795  
796      m_bMainXRefLoadedOK = true;
797      m_bLinearedDataOK = true;
798    }
799  
800    return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
801  }
802  
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)803  CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
804      uint32_t dwPage,
805      DownloadHints* pHints) {
806    if (!m_pDocument)
807      return DataError;
808  
809    const FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
810    if (!safePage.IsValid())
811      return DataError;
812  
813    if (safePage.ValueOrDie() >= m_pDocument->GetPageCount()) {
814      // This is XFA page.
815      return DataAvailable;
816    }
817  
818    if (IsFirstCheck(dwPage)) {
819      m_bCurPageDictLoadOK = false;
820    }
821  
822    if (pdfium::ContainsKey(m_pagesLoadState, dwPage))
823      return DataAvailable;
824  
825    const HintsScope hints_scope(GetValidator().Get(), pHints);
826  
827    if (m_pLinearized) {
828      if (dwPage == m_pLinearized->GetFirstPageNo()) {
829        CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
830        if (!pPageDict)
831          return DataError;
832  
833        auto page_num_obj = std::make_pair(
834            dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
835                        GetValidator().Get(), m_pDocument, pPageDict));
836  
837        CPDF_PageObjectAvail* page_obj_avail =
838            m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
839        // TODO(art-snake): Check resources.
840        return page_obj_avail->CheckAvail();
841      }
842  
843      DocAvailStatus nResult = CheckLinearizedData();
844      if (nResult != DataAvailable)
845        return nResult;
846  
847      if (m_pHintTables) {
848        nResult = m_pHintTables->CheckPage(dwPage);
849        if (nResult != DataAvailable)
850          return nResult;
851        if (GetPage(dwPage)) {
852          m_pagesLoadState.insert(dwPage);
853          return DataAvailable;
854        }
855      }
856  
857      if (!m_bMainXRefLoadedOK) {
858        if (!LoadAllFile())
859          return DataNotAvailable;
860        m_pDocument->GetParser()->RebuildCrossRef();
861        ResetFirstCheck(dwPage);
862        return DataAvailable;
863      }
864      if (m_bTotalLoadPageTree) {
865        if (!LoadPages())
866          return DataNotAvailable;
867      } else {
868        if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
869          return DataNotAvailable;
870      }
871    } else {
872      if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
873        return DataNotAvailable;
874      }
875    }
876  
877    if (CheckAcroForm() == DocFormStatus::FormNotAvailable)
878      return DataNotAvailable;
879  
880    CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
881    if (!pPageDict)
882      return DataError;
883  
884    {
885      auto page_num_obj = std::make_pair(
886          dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
887                      GetValidator().Get(), m_pDocument, pPageDict));
888      CPDF_PageObjectAvail* page_obj_avail =
889          m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
890      const DocAvailStatus status = page_obj_avail->CheckAvail();
891      if (status != DocAvailStatus::DataAvailable)
892        return status;
893    }
894  
895    const DocAvailStatus resources_status = CheckResources(pPageDict);
896    if (resources_status != DocAvailStatus::DataAvailable)
897      return resources_status;
898  
899    m_bCurPageDictLoadOK = false;
900    ResetFirstCheck(dwPage);
901    m_pagesLoadState.insert(dwPage);
902    return DataAvailable;
903  }
904  
CheckResources(const CPDF_Dictionary * page)905  CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
906      const CPDF_Dictionary* page) {
907    ASSERT(page);
908    const CPDF_ReadValidator::Session read_session(GetValidator().Get());
909    const CPDF_Object* resources = GetResourceObject(page);
910    if (GetValidator()->has_read_problems())
911      return DocAvailStatus::DataNotAvailable;
912  
913    if (!resources)
914      return DocAvailStatus::DataAvailable;
915  
916    CPDF_PageObjectAvail* resource_avail =
917        m_PagesResourcesAvail
918            .insert(std::make_pair(
919                resources, pdfium::MakeUnique<CPDF_PageObjectAvail>(
920                               GetValidator().Get(), m_pDocument, resources)))
921            .first->second.get();
922    return resource_avail->CheckAvail();
923  }
924  
GetFileRead() const925  RetainPtr<IFX_SeekableReadStream> CPDF_DataAvail::GetFileRead() const {
926    return m_pFileRead;
927  }
928  
GetValidator() const929  RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
930    return m_pFileRead;
931  }
932  
GetSyntaxParser() const933  CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
934    return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
935                       : m_parser.m_pSyntax.get();
936  }
937  
GetPageCount() const938  int CPDF_DataAvail::GetPageCount() const {
939    if (m_pLinearized)
940      return m_pLinearized->GetPageCount();
941    return m_pDocument ? m_pDocument->GetPageCount() : 0;
942  }
943  
GetPage(int index)944  CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
945    if (!m_pDocument || index < 0 || index >= GetPageCount())
946      return nullptr;
947    CPDF_Dictionary* page = m_pDocument->GetPage(index);
948    if (page)
949      return page;
950    if (!m_pLinearized || !m_pHintTables)
951      return nullptr;
952  
953    if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
954      return nullptr;
955    FX_FILESIZE szPageStartPos = 0;
956    FX_FILESIZE szPageLength = 0;
957    uint32_t dwObjNum = 0;
958    const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
959                                                       &szPageLength, &dwObjNum);
960    if (!bPagePosGot || !dwObjNum)
961      return nullptr;
962    // We should say to the document, which object is the page.
963    m_pDocument->SetPageObjNum(index, dwObjNum);
964    // Page object already can be parsed in document.
965    if (!m_pDocument->GetIndirectObject(dwObjNum)) {
966      m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
967          dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
968    }
969    if (!ValidatePage(index))
970      return nullptr;
971    return m_pDocument->GetPage(index);
972  }
973  
IsFormAvail(DownloadHints * pHints)974  CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
975      DownloadHints* pHints) {
976    const HintsScope hints_scope(GetValidator().Get(), pHints);
977    return CheckAcroForm();
978  }
979  
CheckAcroForm()980  CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
981    if (!m_pDocument)
982      return FormAvailable;
983  
984    if (m_pLinearized) {
985      DocAvailStatus nDocStatus = CheckLinearizedData();
986      if (nDocStatus == DataError)
987        return FormError;
988      if (nDocStatus == DataNotAvailable)
989        return FormNotAvailable;
990    }
991  
992    if (!m_pFormAvail) {
993      const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
994      if (!pRoot)
995        return FormAvailable;
996  
997      CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
998      if (!pAcroForm)
999        return FormNotExist;
1000  
1001      m_pFormAvail = pdfium::MakeUnique<CPDF_PageObjectAvail>(
1002          GetValidator().Get(), m_pDocument, pAcroForm);
1003    }
1004    switch (m_pFormAvail->CheckAvail()) {
1005      case DocAvailStatus::DataError:
1006        return DocFormStatus::FormError;
1007      case DocAvailStatus::DataNotAvailable:
1008        return DocFormStatus::FormNotAvailable;
1009      case DocAvailStatus::DataAvailable:
1010        return DocFormStatus::FormAvailable;
1011      default:
1012        NOTREACHED();
1013    }
1014    return DocFormStatus::FormError;
1015  }
1016  
ValidatePage(uint32_t dwPage)1017  bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) {
1018    FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1019    CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1020    if (!pPageDict)
1021      return false;
1022    CPDF_PageObjectAvail obj_avail(GetValidator().Get(), m_pDocument, pPageDict);
1023    return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable;
1024  }
1025  
1026  std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
ParseDocument(const char * password)1027  CPDF_DataAvail::ParseDocument(const char* password) {
1028    if (m_pDocument) {
1029      // We already returned parsed document.
1030      return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1031    }
1032    auto parser = pdfium::MakeUnique<CPDF_Parser>();
1033    parser->SetPassword(password);
1034    auto document = pdfium::MakeUnique<CPDF_Document>(std::move(parser));
1035  
1036    CPDF_ReadValidator::Session read_session(GetValidator().Get());
1037    CPDF_Parser::Error error = document->GetParser()->StartLinearizedParse(
1038        GetFileRead(), document.get());
1039  
1040    // Additional check, that all ok.
1041    if (GetValidator()->has_read_problems()) {
1042      NOTREACHED();
1043      return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1044    }
1045  
1046    if (error != CPDF_Parser::SUCCESS)
1047      return std::make_pair(error, nullptr);
1048  
1049    m_pDocument = document.get();
1050    return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
1051  }
1052  
PageNode()1053  CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
1054  
~PageNode()1055  CPDF_DataAvail::PageNode::~PageNode() {}
1056