1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 9 10 #include <map> 11 #include <memory> 12 #include <set> 13 #include <utility> 14 #include <vector> 15 16 #include "core/fpdfapi/parser/cpdf_parser.h" 17 #include "core/fpdfapi/parser/cpdf_syntax_parser.h" 18 #include "core/fxcrt/unowned_ptr.h" 19 20 class CPDF_CrossRefAvail; 21 class CPDF_Dictionary; 22 class CPDF_HintTables; 23 class CPDF_IndirectObjectHolder; 24 class CPDF_LinearizedHeader; 25 class CPDF_PageObjectAvail; 26 class CPDF_Parser; 27 class CPDF_ReadValidator; 28 29 enum PDF_DATAAVAIL_STATUS { 30 PDF_DATAAVAIL_HEADER = 0, 31 PDF_DATAAVAIL_FIRSTPAGE, 32 PDF_DATAAVAIL_HINTTABLE, 33 PDF_DATAAVAIL_LOADALLCROSSREF, 34 PDF_DATAAVAIL_ROOT, 35 PDF_DATAAVAIL_INFO, 36 PDF_DATAAVAIL_PAGETREE, 37 PDF_DATAAVAIL_PAGE, 38 PDF_DATAAVAIL_PAGE_LATERLOAD, 39 PDF_DATAAVAIL_RESOURCES, 40 PDF_DATAAVAIL_DONE, 41 PDF_DATAAVAIL_ERROR, 42 PDF_DATAAVAIL_LOADALLFILE, 43 }; 44 45 enum PDF_PAGENODE_TYPE { 46 PDF_PAGENODE_UNKNOWN = 0, 47 PDF_PAGENODE_PAGE, 48 PDF_PAGENODE_PAGES, 49 PDF_PAGENODE_ARRAY, 50 }; 51 52 class CPDF_DataAvail final { 53 public: 54 // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot 55 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 56 // to make sure the two sets of values match. 57 enum DocAvailStatus { 58 DataError = -1, // PDF_DATA_ERROR 59 DataNotAvailable = 0, // PDF_DATA_NOTAVAIL 60 DataAvailable = 1, // PDF_DATA_AVAIL 61 }; 62 63 // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot 64 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 65 // to make sure the two sets of values match. 66 enum DocLinearizationStatus { 67 LinearizationUnknown = -1, // PDF_LINEARIZATION_UNKNOWN 68 NotLinearized = 0, // PDF_NOT_LINEARIZED 69 Linearized = 1, // PDF_LINEARIZED 70 }; 71 72 // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot 73 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 74 // to make sure the two sets of values match. 75 enum DocFormStatus { 76 FormError = -1, // PDF_FORM_ERROR 77 FormNotAvailable = 0, // PDF_FORM_NOTAVAIL 78 FormAvailable = 1, // PDF_FORM_AVAIL 79 FormNotExist = 2, // PDF_FORM_NOTEXIST 80 }; 81 82 class FileAvail { 83 public: 84 virtual ~FileAvail(); 85 virtual bool IsDataAvail(FX_FILESIZE offset, size_t size) = 0; 86 }; 87 88 class DownloadHints { 89 public: 90 virtual ~DownloadHints(); 91 virtual void AddSegment(FX_FILESIZE offset, size_t size) = 0; 92 }; 93 94 CPDF_DataAvail(FileAvail* pFileAvail, 95 const RetainPtr<IFX_SeekableReadStream>& pFileRead, 96 bool bSupportHintTable); 97 ~CPDF_DataAvail(); 98 99 DocAvailStatus IsDocAvail(DownloadHints* pHints); 100 DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints); 101 DocFormStatus IsFormAvail(DownloadHints* pHints); 102 DocLinearizationStatus IsLinearizedPDF(); 103 RetainPtr<IFX_SeekableReadStream> GetFileRead() const; 104 int GetPageCount() const; 105 CPDF_Dictionary* GetPage(int index); 106 RetainPtr<CPDF_ReadValidator> GetValidator() const; 107 108 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>> ParseDocument( 109 const char* password); 110 GetHintTables()111 const CPDF_HintTables* GetHintTables() const { return m_pHintTables.get(); } 112 113 protected: 114 class PageNode { 115 public: 116 PageNode(); 117 ~PageNode(); 118 119 PDF_PAGENODE_TYPE m_type; 120 uint32_t m_dwPageNo; 121 std::vector<std::unique_ptr<PageNode>> m_ChildNodes; 122 }; 123 124 static const int kMaxPageRecursionDepth = 1024; 125 126 bool CheckDocStatus(); 127 bool CheckHeader(); 128 bool CheckFirstPage(); 129 bool CheckHintTables(); 130 bool CheckRoot(); 131 bool CheckInfo(); 132 bool CheckPages(); 133 bool CheckPage(); 134 DocAvailStatus CheckResources(const CPDF_Dictionary* page); 135 DocFormStatus CheckAcroForm(); 136 bool CheckPageStatus(); 137 138 DocAvailStatus CheckHeaderAndLinearized(); 139 std::unique_ptr<CPDF_Object> ParseIndirectObjectAt( 140 FX_FILESIZE pos, 141 uint32_t objnum, 142 CPDF_IndirectObjectHolder* pObjList = nullptr); 143 std::unique_ptr<CPDF_Object> GetObject(uint32_t objnum, 144 bool* pExistInFile); 145 bool GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages); 146 bool PreparePageItem(); 147 bool LoadPages(); 148 bool CheckAndLoadAllXref(); 149 bool LoadAllFile(); 150 DocAvailStatus CheckLinearizedData(); 151 152 bool CheckPage(uint32_t dwPage); 153 bool LoadDocPages(); 154 bool LoadDocPage(uint32_t dwPage); 155 bool CheckPageNode(const PageNode& pageNode, 156 int32_t iPage, 157 int32_t& iCount, 158 int level); 159 bool CheckUnknownPageNode(uint32_t dwPageNo, PageNode* pPageNode); 160 bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode); 161 bool CheckPageCount(); 162 bool IsFirstCheck(uint32_t dwPage); 163 void ResetFirstCheck(uint32_t dwPage); 164 bool ValidatePage(uint32_t dwPage); 165 CPDF_SyntaxParser* GetSyntaxParser() const; 166 167 FileAvail* const m_pFileAvail; 168 RetainPtr<CPDF_ReadValidator> m_pFileRead; 169 CPDF_Parser m_parser; 170 std::unique_ptr<CPDF_Object> m_pRoot; 171 uint32_t m_dwRootObjNum = 0; 172 uint32_t m_dwInfoObjNum = 0; 173 std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized; 174 bool m_bDocAvail = false; 175 std::unique_ptr<CPDF_CrossRefAvail> m_pCrossRefAvail; 176 PDF_DATAAVAIL_STATUS m_docStatus = PDF_DATAAVAIL_HEADER; 177 const FX_FILESIZE m_dwFileLen; 178 CPDF_Document* m_pDocument = nullptr; 179 std::vector<uint32_t> m_PageObjList; 180 uint32_t m_PagesObjNum = 0; 181 bool m_bLinearedDataOK = false; 182 bool m_bMainXRefLoadTried = false; 183 bool m_bMainXRefLoadedOK = false; 184 bool m_bPagesTreeLoad = false; 185 bool m_bPagesLoad = false; 186 CPDF_Parser* m_pCurrentParser = nullptr; 187 std::unique_ptr<CPDF_PageObjectAvail> m_pFormAvail; 188 std::vector<std::unique_ptr<CPDF_Object>> m_PagesArray; 189 uint32_t m_dwEncryptObjNum = 0; 190 bool m_bTotalLoadPageTree = false; 191 bool m_bCurPageDictLoadOK = false; 192 PageNode m_PageNode; 193 std::set<uint32_t> m_pageMapCheckState; 194 std::set<uint32_t> m_pagesLoadState; 195 std::set<uint32_t> m_SeenPrevPositions; 196 std::unique_ptr<CPDF_HintTables> m_pHintTables; 197 const bool m_bSupportHintTable; 198 std::map<uint32_t, std::unique_ptr<CPDF_PageObjectAvail>> m_PagesObjAvail; 199 std::map<const CPDF_Object*, std::unique_ptr<CPDF_PageObjectAvail>> 200 m_PagesResourcesAvail; 201 bool m_bHeaderAvail = false; 202 }; 203 204 #endif // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 205