1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 9 10 #include <map> 11 #include <memory> 12 #include <set> 13 #include <utility> 14 #include <vector> 15 16 #include "core/fpdfapi/parser/cpdf_document.h" 17 #include "core/fpdfapi/parser/cpdf_parser.h" 18 #include "core/fxcrt/unowned_ptr.h" 19 20 class CPDF_CrossRefAvail; 21 class CPDF_Dictionary; 22 class CPDF_HintTables; 23 class CPDF_IndirectObjectHolder; 24 class CPDF_LinearizedHeader; 25 class CPDF_PageObjectAvail; 26 class CPDF_ReadValidator; 27 class CPDF_SyntaxParser; 28 29 enum PDF_DATAAVAIL_STATUS { 30 PDF_DATAAVAIL_HEADER = 0, 31 PDF_DATAAVAIL_FIRSTPAGE, 32 PDF_DATAAVAIL_HINTTABLE, 33 PDF_DATAAVAIL_LOADALLCROSSREF, 34 PDF_DATAAVAIL_ROOT, 35 PDF_DATAAVAIL_INFO, 36 PDF_DATAAVAIL_PAGETREE, 37 PDF_DATAAVAIL_PAGE, 38 PDF_DATAAVAIL_PAGE_LATERLOAD, 39 PDF_DATAAVAIL_RESOURCES, 40 PDF_DATAAVAIL_DONE, 41 PDF_DATAAVAIL_ERROR, 42 PDF_DATAAVAIL_LOADALLFILE, 43 }; 44 45 enum PDF_PAGENODE_TYPE { 46 PDF_PAGENODE_UNKNOWN = 0, 47 PDF_PAGENODE_PAGE, 48 PDF_PAGENODE_PAGES, 49 PDF_PAGENODE_ARRAY, 50 }; 51 52 class CPDF_DataAvail final : public Observable::ObserverIface { 53 public: 54 // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot 55 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 56 // to make sure the two sets of values match. 57 enum DocAvailStatus { 58 DataError = -1, // PDF_DATA_ERROR 59 DataNotAvailable = 0, // PDF_DATA_NOTAVAIL 60 DataAvailable = 1, // PDF_DATA_AVAIL 61 }; 62 63 // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot 64 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 65 // to make sure the two sets of values match. 66 enum DocLinearizationStatus { 67 LinearizationUnknown = -1, // PDF_LINEARIZATION_UNKNOWN 68 NotLinearized = 0, // PDF_NOT_LINEARIZED 69 Linearized = 1, // PDF_LINEARIZED 70 }; 71 72 // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot 73 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 74 // to make sure the two sets of values match. 75 enum DocFormStatus { 76 FormError = -1, // PDF_FORM_ERROR 77 FormNotAvailable = 0, // PDF_FORM_NOTAVAIL 78 FormAvailable = 1, // PDF_FORM_AVAIL 79 FormNotExist = 2, // PDF_FORM_NOTEXIST 80 }; 81 82 class FileAvail { 83 public: 84 virtual ~FileAvail(); 85 virtual bool IsDataAvail(FX_FILESIZE offset, size_t size) = 0; 86 }; 87 88 class DownloadHints { 89 public: 90 virtual ~DownloadHints(); 91 virtual void AddSegment(FX_FILESIZE offset, size_t size) = 0; 92 }; 93 94 CPDF_DataAvail(FileAvail* pFileAvail, 95 const RetainPtr<IFX_SeekableReadStream>& pFileRead, 96 bool bSupportHintTable); 97 ~CPDF_DataAvail() override; 98 99 // CPDF_Document::Observer: 100 void OnObservableDestroyed() override; 101 102 DocAvailStatus IsDocAvail(DownloadHints* pHints); 103 DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints); 104 DocFormStatus IsFormAvail(DownloadHints* pHints); 105 DocLinearizationStatus IsLinearizedPDF(); 106 int GetPageCount() const; 107 CPDF_Dictionary* GetPageDictionary(int index) const; 108 RetainPtr<CPDF_ReadValidator> GetValidator() const; 109 110 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>> ParseDocument( 111 std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData, 112 std::unique_ptr<CPDF_Document::PageDataIface> pPageData, 113 const char* password); 114 GetHintTables()115 const CPDF_HintTables* GetHintTables() const { return m_pHintTables.get(); } 116 117 private: 118 class PageNode { 119 public: 120 PageNode(); 121 ~PageNode(); 122 123 PDF_PAGENODE_TYPE m_type; 124 uint32_t m_dwPageNo; 125 std::vector<std::unique_ptr<PageNode>> m_ChildNodes; 126 }; 127 128 static const int kMaxPageRecursionDepth = 1024; 129 130 bool CheckDocStatus(); 131 bool CheckHeader(); 132 bool CheckFirstPage(); 133 bool CheckHintTables(); 134 bool CheckRoot(); 135 bool CheckInfo(); 136 bool CheckPages(); 137 bool CheckPage(); 138 DocAvailStatus CheckResources(CPDF_Dictionary* page); 139 DocFormStatus CheckAcroForm(); 140 bool CheckPageStatus(); 141 142 DocAvailStatus CheckHeaderAndLinearized(); 143 RetainPtr<CPDF_Object> ParseIndirectObjectAt( 144 FX_FILESIZE pos, 145 uint32_t objnum, 146 CPDF_IndirectObjectHolder* pObjList) const; 147 RetainPtr<CPDF_Object> GetObject(uint32_t objnum, bool* pExistInFile); 148 bool GetPageKids(CPDF_Object* pPages); 149 bool PreparePageItem(); 150 bool LoadPages(); 151 bool CheckAndLoadAllXref(); 152 bool LoadAllFile(); 153 DocAvailStatus CheckLinearizedData(); 154 155 bool CheckPage(uint32_t dwPage); 156 bool LoadDocPages(); 157 bool LoadDocPage(uint32_t dwPage); 158 bool CheckPageNode(const PageNode& pageNode, 159 int32_t iPage, 160 int32_t& iCount, 161 int level); 162 bool CheckUnknownPageNode(uint32_t dwPageNo, PageNode* pPageNode); 163 bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode); 164 bool CheckPageCount(); 165 bool IsFirstCheck(uint32_t dwPage); 166 void ResetFirstCheck(uint32_t dwPage); 167 bool ValidatePage(uint32_t dwPage) const; 168 CPDF_SyntaxParser* GetSyntaxParser() const; 169 170 RetainPtr<CPDF_ReadValidator> m_pFileRead; 171 CPDF_Parser m_parser; 172 RetainPtr<CPDF_Dictionary> m_pRoot; 173 std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized; 174 bool m_bDocAvail = false; 175 std::unique_ptr<CPDF_CrossRefAvail> m_pCrossRefAvail; 176 PDF_DATAAVAIL_STATUS m_docStatus = PDF_DATAAVAIL_HEADER; 177 const FX_FILESIZE m_dwFileLen; 178 UnownedPtr<CPDF_Document> m_pDocument; 179 std::vector<uint32_t> m_PageObjList; 180 uint32_t m_PagesObjNum = 0; 181 bool m_bLinearedDataOK = false; 182 bool m_bMainXRefLoadTried = false; 183 bool m_bMainXRefLoadedOK = false; 184 bool m_bPagesTreeLoad = false; 185 bool m_bPagesLoad = false; 186 std::unique_ptr<CPDF_PageObjectAvail> m_pFormAvail; 187 std::vector<RetainPtr<CPDF_Object>> m_PagesArray; 188 bool m_bTotalLoadPageTree = false; 189 bool m_bCurPageDictLoadOK = false; 190 PageNode m_PageNode; 191 std::set<uint32_t> m_pageMapCheckState; 192 std::set<uint32_t> m_pagesLoadState; 193 std::unique_ptr<CPDF_HintTables> m_pHintTables; 194 const bool m_bSupportHintTable; 195 std::map<uint32_t, std::unique_ptr<CPDF_PageObjectAvail>> m_PagesObjAvail; 196 std::map<const CPDF_Object*, std::unique_ptr<CPDF_PageObjectAvail>> 197 m_PagesResourcesAvail; 198 bool m_bHeaderAvail = false; 199 }; 200 201 #endif // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 202