1 // Copyright 2016 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DOCUMENT_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_DOCUMENT_H_ 9 10 #include <memory> 11 #include <set> 12 #include <utility> 13 #include <vector> 14 15 #include "core/fpdfapi/parser/cpdf_dictionary.h" 16 #include "core/fpdfapi/parser/cpdf_parser.h" 17 #include "core/fxcrt/fx_memory.h" 18 #include "core/fxcrt/observed_ptr.h" 19 #include "core/fxcrt/retain_ptr.h" 20 #include "core/fxcrt/span.h" 21 #include "core/fxcrt/unowned_ptr.h" 22 23 class CPDF_ReadValidator; 24 class CPDF_StreamAcc; 25 class IFX_SeekableReadStream; 26 class JBig2_DocumentContext; 27 28 class CPDF_Document : public Observable, 29 public CPDF_Parser::ParsedObjectsHolder { 30 public: 31 // Type from which the XFA extension can subclass itself. 32 class Extension { 33 public: 34 virtual ~Extension() = default; 35 virtual int GetPageCount() const = 0; 36 virtual uint32_t DeletePage(int page_index) = 0; 37 virtual bool ContainsExtensionForm() const = 0; 38 virtual bool ContainsExtensionFullForm() const = 0; 39 virtual bool ContainsExtensionForegroundForm() const = 0; 40 }; 41 42 class LinkListIface { 43 public: 44 // CPDF_Document merely helps manage the lifetime. 45 virtual ~LinkListIface() = default; 46 }; 47 48 class PageDataIface { 49 public: 50 PageDataIface(); 51 virtual ~PageDataIface(); 52 53 virtual void ClearStockFont() = 0; 54 virtual RetainPtr<CPDF_StreamAcc> GetFontFileStreamAcc( 55 RetainPtr<const CPDF_Stream> pFontStream) = 0; 56 virtual void MaybePurgeFontFileStreamAcc( 57 RetainPtr<CPDF_StreamAcc>&& pStreamAcc) = 0; 58 virtual void MaybePurgeImage(uint32_t objnum) = 0; 59 SetDocument(CPDF_Document * pDoc)60 void SetDocument(CPDF_Document* pDoc) { m_pDoc = pDoc; } 61 62 protected: GetDocument()63 CPDF_Document* GetDocument() const { return m_pDoc; } 64 65 private: 66 UnownedPtr<CPDF_Document> m_pDoc; 67 }; 68 69 class RenderDataIface { 70 public: 71 RenderDataIface(); 72 virtual ~RenderDataIface(); 73 SetDocument(CPDF_Document * pDoc)74 void SetDocument(CPDF_Document* pDoc) { m_pDoc = pDoc; } 75 76 protected: GetDocument()77 CPDF_Document* GetDocument() const { return m_pDoc; } 78 79 private: 80 UnownedPtr<CPDF_Document> m_pDoc; 81 }; 82 83 static constexpr int kPageMaxNum = 0xFFFFF; 84 85 static bool IsValidPageObject(const CPDF_Object* obj); 86 87 CPDF_Document(std::unique_ptr<RenderDataIface> pRenderData, 88 std::unique_ptr<PageDataIface> pPageData); 89 ~CPDF_Document() override; 90 GetExtension()91 Extension* GetExtension() const { return m_pExtension.get(); } SetExtension(std::unique_ptr<Extension> pExt)92 void SetExtension(std::unique_ptr<Extension> pExt) { 93 m_pExtension = std::move(pExt); 94 } 95 GetParser()96 CPDF_Parser* GetParser() const { return m_pParser.get(); } GetRoot()97 const CPDF_Dictionary* GetRoot() const { return m_pRootDict.Get(); } GetMutableRoot()98 RetainPtr<CPDF_Dictionary> GetMutableRoot() { return m_pRootDict; } 99 RetainPtr<CPDF_Dictionary> GetInfo(); 100 RetainPtr<const CPDF_Array> GetFileIdentifier() const; 101 102 // Returns the object number for the deleted page, or 0 on failure. 103 uint32_t DeletePage(int iPage); 104 // `page_obj_num` is the return value from DeletePage(). If it is non-zero, 105 // and it is no longer used in the page tree, then replace the page object 106 // with a null object. 107 void SetPageToNullObject(uint32_t page_obj_num); 108 bool MovePages(pdfium::span<const int> page_indices, int dest_page_index); 109 110 int GetPageCount() const; 111 bool IsPageLoaded(int iPage) const; 112 RetainPtr<const CPDF_Dictionary> GetPageDictionary(int iPage); 113 RetainPtr<CPDF_Dictionary> GetMutablePageDictionary(int iPage); 114 int GetPageIndex(uint32_t objnum); 115 // When `get_owner_perms` is true, returns full permissions if unlocked by 116 // owner. 117 uint32_t GetUserPermissions(bool get_owner_perms) const; 118 119 // PageDataIface wrappers, try to avoid explicit getter calls. 120 RetainPtr<CPDF_StreamAcc> GetFontFileStreamAcc( 121 RetainPtr<const CPDF_Stream> pFontStream); 122 void MaybePurgeFontFileStreamAcc(RetainPtr<CPDF_StreamAcc>&& pStreamAcc); 123 void MaybePurgeImage(uint32_t objnum); 124 125 // Returns a valid pointer, unless it is called during destruction. GetPageData()126 PageDataIface* GetPageData() const { return m_pDocPage.get(); } GetRenderData()127 RenderDataIface* GetRenderData() const { return m_pDocRender.get(); } 128 129 void SetPageObjNum(int iPage, uint32_t objNum); 130 131 JBig2_DocumentContext* GetOrCreateCodecContext(); GetLinksContext()132 LinkListIface* GetLinksContext() const { return m_pLinksContext.get(); } SetLinksContext(std::unique_ptr<LinkListIface> pContext)133 void SetLinksContext(std::unique_ptr<LinkListIface> pContext) { 134 m_pLinksContext = std::move(pContext); 135 } 136 137 // Behaves like NewIndirect<CPDF_Stream>(dict), but keeps track of the object 138 // number assigned to the newly created stream. 139 RetainPtr<CPDF_Stream> CreateModifiedAPStream( 140 RetainPtr<CPDF_Dictionary> dict); 141 142 // Returns whether CreateModifiedAPStream() created `stream`. 143 bool IsModifiedAPStream(const CPDF_Stream* stream) const; 144 145 // CPDF_Parser::ParsedObjectsHolder: 146 bool TryInit() override; 147 RetainPtr<CPDF_Object> ParseIndirectObject(uint32_t objnum) override; 148 149 CPDF_Parser::Error LoadDoc(RetainPtr<IFX_SeekableReadStream> pFileAccess, 150 const ByteString& password); 151 CPDF_Parser::Error LoadLinearizedDoc(RetainPtr<CPDF_ReadValidator> validator, 152 const ByteString& password); has_valid_cross_reference_table()153 bool has_valid_cross_reference_table() const { 154 return m_bHasValidCrossReferenceTable; 155 } 156 157 void LoadPages(); 158 void CreateNewDoc(); 159 RetainPtr<CPDF_Dictionary> CreateNewPage(int iPage); 160 IncrementParsedPageCount()161 void IncrementParsedPageCount() { ++m_ParsedPageCount; } GetParsedPageCountForTesting()162 uint32_t GetParsedPageCountForTesting() { return m_ParsedPageCount; } 163 164 void SetRootForTesting(RetainPtr<CPDF_Dictionary> root); 165 166 protected: 167 void SetParser(std::unique_ptr<CPDF_Parser> pParser); 168 169 void ResizePageListForTesting(size_t size); 170 171 private: 172 class StockFontClearer { 173 public: 174 FX_STACK_ALLOCATED(); 175 176 explicit StockFontClearer(CPDF_Document::PageDataIface* pPageData); 177 ~StockFontClearer(); 178 179 private: 180 UnownedPtr<CPDF_Document::PageDataIface> const m_pPageData; 181 }; 182 183 // Retrieve page count information by getting count value from the tree nodes 184 int RetrievePageCount(); 185 186 // When this method is called, m_pTreeTraversal[level] exists. 187 RetainPtr<CPDF_Dictionary> TraversePDFPages(int iPage, 188 int* nPagesToGo, 189 size_t level); 190 191 RetainPtr<const CPDF_Dictionary> GetPagesDict() const; 192 RetainPtr<CPDF_Dictionary> GetMutablePagesDict(); 193 194 bool InsertDeletePDFPage(RetainPtr<CPDF_Dictionary> pages_dict, 195 int pages_to_go, 196 RetainPtr<CPDF_Dictionary> page_dict, 197 bool is_insert, 198 std::set<RetainPtr<CPDF_Dictionary>>* visited); 199 200 bool InsertNewPage(int iPage, RetainPtr<CPDF_Dictionary> pPageDict); 201 void ResetTraversal(); 202 CPDF_Parser::Error HandleLoadResult(CPDF_Parser::Error error); 203 204 std::unique_ptr<CPDF_Parser> m_pParser; 205 RetainPtr<CPDF_Dictionary> m_pRootDict; 206 RetainPtr<CPDF_Dictionary> m_pInfoDict; 207 208 // Vector of pairs to know current position in the page tree. The index in the 209 // vector corresponds to the level being described. The pair contains a 210 // pointer to the dictionary being processed at the level, and an index of the 211 // of the child being processed within the dictionary's /Kids array. 212 std::vector<std::pair<RetainPtr<CPDF_Dictionary>, size_t>> m_pTreeTraversal; 213 214 // True if the CPDF_Parser succeeded without having to rebuild the cross 215 // reference table. 216 bool m_bHasValidCrossReferenceTable = false; 217 218 // Index of the next page that will be traversed from the page tree. 219 bool m_bReachedMaxPageLevel = false; 220 int m_iNextPageToTraverse = 0; 221 uint32_t m_ParsedPageCount = 0; 222 223 std::unique_ptr<RenderDataIface> const m_pDocRender; 224 // Must be after `m_pDocRender`. 225 std::unique_ptr<PageDataIface> const m_pDocPage; 226 std::unique_ptr<JBig2_DocumentContext> m_pCodecContext; 227 std::unique_ptr<LinkListIface> m_pLinksContext; 228 std::set<uint32_t> m_ModifiedAPStreamIDs; 229 std::vector<uint32_t> m_PageList; // Page number to page's dict objnum. 230 231 // Must be second to last. 232 StockFontClearer m_StockFontClearer; 233 234 // Must be last. Destroy the extension before any non-extension teardown. 235 std::unique_ptr<Extension> m_pExtension; 236 }; 237 238 #endif // CORE_FPDFAPI_PARSER_CPDF_DOCUMENT_H_ 239