1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 9 10 #include <memory> 11 #include <set> 12 #include <vector> 13 14 #include "core/fpdfapi/parser/cpdf_parser.h" 15 #include "core/fpdfapi/parser/cpdf_syntax_parser.h" 16 #include "core/fxcrt/fx_basic.h" 17 18 class CPDF_Dictionary; 19 class CPDF_HintTables; 20 class CPDF_IndirectObjectHolder; 21 class CPDF_LinearizedHeader; 22 class CPDF_Parser; 23 24 enum PDF_DATAAVAIL_STATUS { 25 PDF_DATAAVAIL_HEADER = 0, 26 PDF_DATAAVAIL_FIRSTPAGE, 27 PDF_DATAAVAIL_HINTTABLE, 28 PDF_DATAAVAIL_END, 29 PDF_DATAAVAIL_CROSSREF, 30 PDF_DATAAVAIL_CROSSREF_ITEM, 31 PDF_DATAAVAIL_CROSSREF_STREAM, 32 PDF_DATAAVAIL_TRAILER, 33 PDF_DATAAVAIL_LOADALLCROSSREF, 34 PDF_DATAAVAIL_ROOT, 35 PDF_DATAAVAIL_INFO, 36 PDF_DATAAVAIL_ACROFORM, 37 PDF_DATAAVAIL_ACROFORM_SUBOBJECT, 38 PDF_DATAAVAIL_PAGETREE, 39 PDF_DATAAVAIL_PAGE, 40 PDF_DATAAVAIL_PAGE_LATERLOAD, 41 PDF_DATAAVAIL_RESOURCES, 42 PDF_DATAAVAIL_DONE, 43 PDF_DATAAVAIL_ERROR, 44 PDF_DATAAVAIL_LOADALLFILE, 45 PDF_DATAAVAIL_TRAILER_APPEND 46 }; 47 48 enum PDF_PAGENODE_TYPE { 49 PDF_PAGENODE_UNKNOWN = 0, 50 PDF_PAGENODE_PAGE, 51 PDF_PAGENODE_PAGES, 52 PDF_PAGENODE_ARRAY, 53 }; 54 55 class CPDF_DataAvail final { 56 public: 57 // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot 58 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 59 // to make sure the two sets of values match. 60 enum DocAvailStatus { 61 DataError = -1, // PDF_DATA_ERROR 62 DataNotAvailable = 0, // PDF_DATA_NOTAVAIL 63 DataAvailable = 1, // PDF_DATA_AVAIL 64 }; 65 66 // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot 67 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 68 // to make sure the two sets of values match. 69 enum DocLinearizationStatus { 70 LinearizationUnknown = -1, // PDF_LINEARIZATION_UNKNOWN 71 NotLinearized = 0, // PDF_NOT_LINEARIZED 72 Linearized = 1, // PDF_LINEARIZED 73 }; 74 75 // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot 76 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 77 // to make sure the two sets of values match. 78 enum DocFormStatus { 79 FormError = -1, // PDF_FORM_ERROR 80 FormNotAvailable = 0, // PDF_FORM_NOTAVAIL 81 FormAvailable = 1, // PDF_FORM_AVAIL 82 FormNotExist = 2, // PDF_FORM_NOTEXIST 83 }; 84 85 class FileAvail { 86 public: 87 virtual ~FileAvail(); 88 virtual bool IsDataAvail(FX_FILESIZE offset, uint32_t size) = 0; 89 }; 90 91 class DownloadHints { 92 public: 93 virtual ~DownloadHints(); 94 virtual void AddSegment(FX_FILESIZE offset, uint32_t size) = 0; 95 }; 96 97 CPDF_DataAvail(FileAvail* pFileAvail, 98 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead, 99 bool bSupportHintTable); 100 ~CPDF_DataAvail(); 101 102 bool IsDataAvail(FX_FILESIZE offset, uint32_t size, DownloadHints* pHints); 103 DocAvailStatus IsDocAvail(DownloadHints* pHints); 104 void SetDocument(CPDF_Document* pDoc); 105 DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints); 106 DocFormStatus IsFormAvail(DownloadHints* pHints); 107 DocLinearizationStatus IsLinearizedPDF(); 108 bool IsLinearized(); 109 void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, uint32_t* pSize); GetFileRead()110 CFX_RetainPtr<IFX_SeekableReadStream> GetFileRead() const { 111 return m_pFileRead; 112 } 113 int GetPageCount() const; 114 CPDF_Dictionary* GetPage(int index); 115 116 protected: 117 class PageNode { 118 public: 119 PageNode(); 120 ~PageNode(); 121 122 PDF_PAGENODE_TYPE m_type; 123 uint32_t m_dwPageNo; 124 std::vector<std::unique_ptr<PageNode>> m_ChildNodes; 125 }; 126 127 static const int kMaxDataAvailRecursionDepth = 64; 128 static int s_CurrentDataAvailRecursionDepth; 129 static const int kMaxPageRecursionDepth = 1024; 130 131 uint32_t GetObjectSize(uint32_t objnum, FX_FILESIZE& offset); 132 bool AreObjectsAvailable(std::vector<CPDF_Object*>& obj_array, 133 bool bParsePage, 134 DownloadHints* pHints, 135 std::vector<CPDF_Object*>& ret_array); 136 bool CheckDocStatus(DownloadHints* pHints); 137 bool CheckHeader(DownloadHints* pHints); 138 bool CheckFirstPage(DownloadHints* pHints); 139 bool CheckHintTables(DownloadHints* pHints); 140 bool CheckEnd(DownloadHints* pHints); 141 bool CheckCrossRef(DownloadHints* pHints); 142 bool CheckCrossRefItem(DownloadHints* pHints); 143 bool CheckTrailer(DownloadHints* pHints); 144 bool CheckRoot(DownloadHints* pHints); 145 bool CheckInfo(DownloadHints* pHints); 146 bool CheckPages(DownloadHints* pHints); 147 bool CheckPage(DownloadHints* pHints); 148 bool CheckResources(DownloadHints* pHints); 149 bool CheckAnnots(DownloadHints* pHints); 150 bool CheckAcroForm(DownloadHints* pHints); 151 bool CheckAcroFormSubObject(DownloadHints* pHints); 152 bool CheckTrailerAppend(DownloadHints* pHints); 153 bool CheckPageStatus(DownloadHints* pHints); 154 bool CheckAllCrossRefStream(DownloadHints* pHints); 155 156 int32_t CheckCrossRefStream(DownloadHints* pHints, FX_FILESIZE& xref_offset); 157 bool IsLinearizedFile(uint8_t* pData, uint32_t dwLen); 158 void SetStartOffset(FX_FILESIZE dwOffset); 159 bool GetNextToken(CFX_ByteString& token); 160 bool GetNextChar(uint8_t& ch); 161 std::unique_ptr<CPDF_Object> ParseIndirectObjectAt( 162 FX_FILESIZE pos, 163 uint32_t objnum, 164 CPDF_IndirectObjectHolder* pObjList = nullptr); 165 std::unique_ptr<CPDF_Object> GetObject(uint32_t objnum, 166 DownloadHints* pHints, 167 bool* pExistInFile); 168 bool GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages); 169 bool PreparePageItem(); 170 bool LoadPages(DownloadHints* pHints); 171 bool LoadAllXref(DownloadHints* pHints); 172 bool LoadAllFile(DownloadHints* pHints); 173 DocAvailStatus CheckLinearizedData(DownloadHints* pHints); 174 bool CheckPageAnnots(uint32_t dwPage, DownloadHints* pHints); 175 176 DocAvailStatus CheckLinearizedFirstPage(uint32_t dwPage, 177 DownloadHints* pHints); 178 bool HaveResourceAncestor(CPDF_Dictionary* pDict); 179 bool CheckPage(uint32_t dwPage, DownloadHints* pHints); 180 bool LoadDocPages(DownloadHints* pHints); 181 bool LoadDocPage(uint32_t dwPage, DownloadHints* pHints); 182 bool CheckPageNode(const PageNode& pageNode, 183 int32_t iPage, 184 int32_t& iCount, 185 DownloadHints* pHints, 186 int level); 187 bool CheckUnknownPageNode(uint32_t dwPageNo, 188 PageNode* pPageNode, 189 DownloadHints* pHints); 190 bool CheckArrayPageNode(uint32_t dwPageNo, 191 PageNode* pPageNode, 192 DownloadHints* pHints); 193 bool CheckPageCount(DownloadHints* pHints); 194 bool IsFirstCheck(uint32_t dwPage); 195 void ResetFirstCheck(uint32_t dwPage); 196 bool ValidatePage(uint32_t dwPage); 197 bool ValidateForm(); 198 199 FileAvail* const m_pFileAvail; 200 CFX_RetainPtr<IFX_SeekableReadStream> m_pFileRead; 201 CPDF_Parser m_parser; 202 CPDF_SyntaxParser m_syntaxParser; 203 std::unique_ptr<CPDF_Object> m_pRoot; 204 uint32_t m_dwRootObjNum; 205 uint32_t m_dwInfoObjNum; 206 std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized; 207 CPDF_Object* m_pTrailer; 208 bool m_bDocAvail; 209 FX_FILESIZE m_dwHeaderOffset; 210 FX_FILESIZE m_dwLastXRefOffset; 211 FX_FILESIZE m_dwXRefOffset; 212 FX_FILESIZE m_dwTrailerOffset; 213 FX_FILESIZE m_dwCurrentOffset; 214 PDF_DATAAVAIL_STATUS m_docStatus; 215 FX_FILESIZE m_dwFileLen; 216 CPDF_Document* m_pDocument; 217 std::set<uint32_t> m_ObjectSet; 218 std::vector<CPDF_Object*> m_objs_array; 219 FX_FILESIZE m_Pos; 220 FX_FILESIZE m_bufferOffset; 221 uint32_t m_bufferSize; 222 CFX_ByteString m_WordBuf; 223 uint8_t m_bufferData[512]; 224 std::vector<uint32_t> m_XRefStreamList; 225 std::vector<uint32_t> m_PageObjList; 226 uint32_t m_PagesObjNum; 227 bool m_bLinearedDataOK; 228 bool m_bMainXRefLoadTried; 229 bool m_bMainXRefLoadedOK; 230 bool m_bPagesTreeLoad; 231 bool m_bPagesLoad; 232 CPDF_Parser* m_pCurrentParser; 233 FX_FILESIZE m_dwCurrentXRefSteam; 234 bool m_bAnnotsLoad; 235 bool m_bHaveAcroForm; 236 uint32_t m_dwAcroFormObjNum; 237 bool m_bAcroFormLoad; 238 CPDF_Object* m_pAcroForm; 239 std::vector<CPDF_Object*> m_arrayAcroforms; 240 CPDF_Dictionary* m_pPageDict; 241 CPDF_Object* m_pPageResource; 242 bool m_bNeedDownLoadResource; 243 bool m_bPageLoadedOK; 244 bool m_bLinearizedFormParamLoad; 245 std::vector<std::unique_ptr<CPDF_Object>> m_PagesArray; 246 uint32_t m_dwEncryptObjNum; 247 FX_FILESIZE m_dwPrevXRefOffset; 248 bool m_bTotalLoadPageTree; 249 bool m_bCurPageDictLoadOK; 250 PageNode m_PageNode; 251 std::set<uint32_t> m_pageMapCheckState; 252 std::set<uint32_t> m_pagesLoadState; 253 std::unique_ptr<CPDF_HintTables> m_pHintTables; 254 bool m_bSupportHintTable; 255 }; 256 257 #endif // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 258