// Copyright 2016 The PDFium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ #include #include #include #include #include #include #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_parser.h" #include "core/fxcrt/retain_ptr.h" #include "core/fxcrt/unowned_ptr.h" class CPDF_CrossRefAvail; class CPDF_Dictionary; class CPDF_HintTables; class CPDF_IndirectObjectHolder; class CPDF_LinearizedHeader; class CPDF_PageObjectAvail; class CPDF_ReadValidator; class CPDF_SyntaxParser; class CPDF_DataAvail final : public Observable::ObserverIface { public: // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts // to make sure the two sets of values match. enum DocAvailStatus { kDataError = -1, // PDF_DATA_ERROR kDataNotAvailable = 0, // PDF_DATA_NOTAVAIL kDataAvailable = 1, // PDF_DATA_AVAIL }; // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts // to make sure the two sets of values match. enum DocLinearizationStatus { kLinearizationUnknown = -1, // PDF_LINEARIZATION_UNKNOWN kNotLinearized = 0, // PDF_NOT_LINEARIZED kLinearized = 1, // PDF_LINEARIZED }; // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts // to make sure the two sets of values match. enum DocFormStatus { kFormError = -1, // PDF_FORM_ERROR kFormNotAvailable = 0, // PDF_FORM_NOTAVAIL kFormAvailable = 1, // PDF_FORM_AVAIL kFormNotExist = 2, // PDF_FORM_NOTEXIST }; class FileAvail { public: virtual ~FileAvail(); virtual bool IsDataAvail(FX_FILESIZE offset, size_t size) = 0; }; class DownloadHints { public: virtual ~DownloadHints(); virtual void AddSegment(FX_FILESIZE offset, size_t size) = 0; }; CPDF_DataAvail(FileAvail* pFileAvail, RetainPtr pFileRead); ~CPDF_DataAvail() override; // Observable::ObserverIface: void OnObservableDestroyed() override; DocAvailStatus IsDocAvail(DownloadHints* pHints); DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints); DocFormStatus IsFormAvail(DownloadHints* pHints); DocLinearizationStatus IsLinearizedPDF(); int GetPageCount() const; RetainPtr GetPageDictionary(int index) const; RetainPtr GetValidator() const; std::pair> ParseDocument( std::unique_ptr pRenderData, std::unique_ptr pPageData, const ByteString& password); const CPDF_HintTables* GetHintTablesForTest() const { return m_pHintTables.get(); } private: enum class InternalStatus : uint8_t { kHeader = 0, kFirstPage, kHintTable, kLoadAllCrossRef, kRoot, kInfo, kPageTree, kPage, kPageLaterLoad, kResources, kDone, kError, kLoadAllFile, }; class PageNode { public: enum class Type { kUnknown = 0, kPage, kPages, kArray }; PageNode(); ~PageNode(); Type m_type = Type::kUnknown; uint32_t m_dwPageNo = 0; std::vector> m_ChildNodes; }; static constexpr int kMaxPageRecursionDepth = 1024; bool CheckDocStatus(); bool CheckHeader(); bool CheckFirstPage(); bool CheckHintTables(); bool CheckRoot(); bool CheckInfo(); bool CheckPages(); bool CheckPage(); DocAvailStatus CheckResources(RetainPtr page); DocFormStatus CheckAcroForm(); bool CheckPageStatus(); DocAvailStatus CheckHeaderAndLinearized(); RetainPtr ParseIndirectObjectAt( FX_FILESIZE pos, uint32_t objnum, CPDF_IndirectObjectHolder* pObjList) const; RetainPtr GetObject(uint32_t objnum, bool* pExistInFile); bool GetPageKids(CPDF_Object* pPages); bool PreparePageItem(); bool LoadPages(); bool CheckAndLoadAllXref(); bool LoadAllFile(); DocAvailStatus CheckLinearizedData(); bool CheckPage(uint32_t dwPage); bool LoadDocPages(); bool LoadDocPage(uint32_t dwPage); bool CheckPageNode(const PageNode& pageNode, int32_t iPage, int32_t& iCount, int level); bool CheckUnknownPageNode(uint32_t dwPageNo, PageNode* pPageNode); bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode); bool CheckPageCount(); bool IsFirstCheck(uint32_t dwPage); void ResetFirstCheck(uint32_t dwPage); bool ValidatePage(uint32_t dwPage) const; CPDF_SyntaxParser* GetSyntaxParser() const; RetainPtr m_pFileRead; CPDF_Parser m_parser; RetainPtr m_pRoot; std::unique_ptr m_pLinearized; bool m_bDocAvail = false; InternalStatus m_internalStatus = InternalStatus::kHeader; std::unique_ptr m_pCrossRefAvail; const FX_FILESIZE m_dwFileLen; UnownedPtr m_pDocument; std::vector m_PageObjList; std::set m_SeenPageObjList; uint32_t m_PagesObjNum = 0; bool m_bLinearedDataOK = false; bool m_bMainXRefLoadTried = false; bool m_bMainXRefLoadedOK = false; bool m_bPagesTreeLoad = false; bool m_bPagesLoad = false; std::unique_ptr m_pFormAvail; std::vector> m_PagesArray; bool m_bTotalLoadPageTree = false; bool m_bCurPageDictLoadOK = false; bool m_bHeaderAvail = false; PageNode m_PageNode; std::set m_pageMapCheckState; std::set m_pagesLoadState; std::unique_ptr m_pHintTables; std::map> m_PagesObjAvail; std::map, std::unique_ptr, std::less<>> m_PagesResourcesAvail; }; #endif // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_