• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DOCUMENT_H_
8 #define CORE_FPDFAPI_PARSER_CPDF_DOCUMENT_H_
9 
10 #include <memory>
11 #include <set>
12 #include <utility>
13 #include <vector>
14 
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_parser.h"
17 #include "core/fxcrt/fx_memory.h"
18 #include "core/fxcrt/observed_ptr.h"
19 #include "core/fxcrt/retain_ptr.h"
20 #include "core/fxcrt/span.h"
21 #include "core/fxcrt/unowned_ptr.h"
22 
23 class CPDF_ReadValidator;
24 class CPDF_StreamAcc;
25 class IFX_SeekableReadStream;
26 class JBig2_DocumentContext;
27 
28 class CPDF_Document : public Observable,
29                       public CPDF_Parser::ParsedObjectsHolder {
30  public:
31   // Type from which the XFA extension can subclass itself.
32   class Extension {
33    public:
34     virtual ~Extension() = default;
35     virtual int GetPageCount() const = 0;
36     virtual uint32_t DeletePage(int page_index) = 0;
37     virtual bool ContainsExtensionForm() const = 0;
38     virtual bool ContainsExtensionFullForm() const = 0;
39     virtual bool ContainsExtensionForegroundForm() const = 0;
40   };
41 
42   class LinkListIface {
43    public:
44     // CPDF_Document merely helps manage the lifetime.
45     virtual ~LinkListIface() = default;
46   };
47 
48   class PageDataIface {
49    public:
50     PageDataIface();
51     virtual ~PageDataIface();
52 
53     virtual void ClearStockFont() = 0;
54     virtual RetainPtr<CPDF_StreamAcc> GetFontFileStreamAcc(
55         RetainPtr<const CPDF_Stream> pFontStream) = 0;
56     virtual void MaybePurgeFontFileStreamAcc(
57         RetainPtr<CPDF_StreamAcc>&& pStreamAcc) = 0;
58     virtual void MaybePurgeImage(uint32_t objnum) = 0;
59 
SetDocument(CPDF_Document * pDoc)60     void SetDocument(CPDF_Document* pDoc) { m_pDoc = pDoc; }
61 
62    protected:
GetDocument()63     CPDF_Document* GetDocument() const { return m_pDoc; }
64 
65    private:
66     UnownedPtr<CPDF_Document> m_pDoc;
67   };
68 
69   class RenderDataIface {
70    public:
71     RenderDataIface();
72     virtual ~RenderDataIface();
73 
SetDocument(CPDF_Document * pDoc)74     void SetDocument(CPDF_Document* pDoc) { m_pDoc = pDoc; }
75 
76    protected:
GetDocument()77     CPDF_Document* GetDocument() const { return m_pDoc; }
78 
79    private:
80     UnownedPtr<CPDF_Document> m_pDoc;
81   };
82 
83   static constexpr int kPageMaxNum = 0xFFFFF;
84 
85   static bool IsValidPageObject(const CPDF_Object* obj);
86 
87   CPDF_Document(std::unique_ptr<RenderDataIface> pRenderData,
88                 std::unique_ptr<PageDataIface> pPageData);
89   ~CPDF_Document() override;
90 
GetExtension()91   Extension* GetExtension() const { return m_pExtension.get(); }
SetExtension(std::unique_ptr<Extension> pExt)92   void SetExtension(std::unique_ptr<Extension> pExt) {
93     m_pExtension = std::move(pExt);
94   }
95 
GetParser()96   CPDF_Parser* GetParser() const { return m_pParser.get(); }
GetRoot()97   const CPDF_Dictionary* GetRoot() const { return m_pRootDict.Get(); }
GetMutableRoot()98   RetainPtr<CPDF_Dictionary> GetMutableRoot() { return m_pRootDict; }
99   RetainPtr<CPDF_Dictionary> GetInfo();
100   RetainPtr<const CPDF_Array> GetFileIdentifier() const;
101 
102   // Returns the object number for the deleted page, or 0 on failure.
103   uint32_t DeletePage(int iPage);
104   // `page_obj_num` is the return value from DeletePage(). If it is non-zero,
105   // and it is no longer used in the page tree, then replace the page object
106   // with a null object.
107   void SetPageToNullObject(uint32_t page_obj_num);
108   bool MovePages(pdfium::span<const int> page_indices, int dest_page_index);
109 
110   int GetPageCount() const;
111   bool IsPageLoaded(int iPage) const;
112   RetainPtr<const CPDF_Dictionary> GetPageDictionary(int iPage);
113   RetainPtr<CPDF_Dictionary> GetMutablePageDictionary(int iPage);
114   int GetPageIndex(uint32_t objnum);
115   // When `get_owner_perms` is true, returns full permissions if unlocked by
116   // owner.
117   uint32_t GetUserPermissions(bool get_owner_perms) const;
118 
119   // PageDataIface wrappers, try to avoid explicit getter calls.
120   RetainPtr<CPDF_StreamAcc> GetFontFileStreamAcc(
121       RetainPtr<const CPDF_Stream> pFontStream);
122   void MaybePurgeFontFileStreamAcc(RetainPtr<CPDF_StreamAcc>&& pStreamAcc);
123   void MaybePurgeImage(uint32_t objnum);
124 
125   // Returns a valid pointer, unless it is called during destruction.
GetPageData()126   PageDataIface* GetPageData() const { return m_pDocPage.get(); }
GetRenderData()127   RenderDataIface* GetRenderData() const { return m_pDocRender.get(); }
128 
129   void SetPageObjNum(int iPage, uint32_t objNum);
130 
131   JBig2_DocumentContext* GetOrCreateCodecContext();
GetLinksContext()132   LinkListIface* GetLinksContext() const { return m_pLinksContext.get(); }
SetLinksContext(std::unique_ptr<LinkListIface> pContext)133   void SetLinksContext(std::unique_ptr<LinkListIface> pContext) {
134     m_pLinksContext = std::move(pContext);
135   }
136 
137   // Behaves like NewIndirect<CPDF_Stream>(dict), but keeps track of the object
138   // number assigned to the newly created stream.
139   RetainPtr<CPDF_Stream> CreateModifiedAPStream(
140       RetainPtr<CPDF_Dictionary> dict);
141 
142   // Returns whether CreateModifiedAPStream() created `stream`.
143   bool IsModifiedAPStream(const CPDF_Stream* stream) const;
144 
145   // CPDF_Parser::ParsedObjectsHolder:
146   bool TryInit() override;
147   RetainPtr<CPDF_Object> ParseIndirectObject(uint32_t objnum) override;
148 
149   CPDF_Parser::Error LoadDoc(RetainPtr<IFX_SeekableReadStream> pFileAccess,
150                              const ByteString& password);
151   CPDF_Parser::Error LoadLinearizedDoc(RetainPtr<CPDF_ReadValidator> validator,
152                                        const ByteString& password);
has_valid_cross_reference_table()153   bool has_valid_cross_reference_table() const {
154     return m_bHasValidCrossReferenceTable;
155   }
156 
157   void LoadPages();
158   void CreateNewDoc();
159   RetainPtr<CPDF_Dictionary> CreateNewPage(int iPage);
160 
IncrementParsedPageCount()161   void IncrementParsedPageCount() { ++m_ParsedPageCount; }
GetParsedPageCountForTesting()162   uint32_t GetParsedPageCountForTesting() { return m_ParsedPageCount; }
163 
164   void SetRootForTesting(RetainPtr<CPDF_Dictionary> root);
165 
166  protected:
167   void SetParser(std::unique_ptr<CPDF_Parser> pParser);
168 
169   void ResizePageListForTesting(size_t size);
170 
171  private:
172   class StockFontClearer {
173    public:
174     FX_STACK_ALLOCATED();
175 
176     explicit StockFontClearer(CPDF_Document::PageDataIface* pPageData);
177     ~StockFontClearer();
178 
179    private:
180     UnownedPtr<CPDF_Document::PageDataIface> const m_pPageData;
181   };
182 
183   // Retrieve page count information by getting count value from the tree nodes
184   int RetrievePageCount();
185 
186   // When this method is called, m_pTreeTraversal[level] exists.
187   RetainPtr<CPDF_Dictionary> TraversePDFPages(int iPage,
188                                               int* nPagesToGo,
189                                               size_t level);
190 
191   RetainPtr<const CPDF_Dictionary> GetPagesDict() const;
192   RetainPtr<CPDF_Dictionary> GetMutablePagesDict();
193 
194   bool InsertDeletePDFPage(RetainPtr<CPDF_Dictionary> pages_dict,
195                            int pages_to_go,
196                            RetainPtr<CPDF_Dictionary> page_dict,
197                            bool is_insert,
198                            std::set<RetainPtr<CPDF_Dictionary>>* visited);
199 
200   bool InsertNewPage(int iPage, RetainPtr<CPDF_Dictionary> pPageDict);
201   void ResetTraversal();
202   CPDF_Parser::Error HandleLoadResult(CPDF_Parser::Error error);
203 
204   std::unique_ptr<CPDF_Parser> m_pParser;
205   RetainPtr<CPDF_Dictionary> m_pRootDict;
206   RetainPtr<CPDF_Dictionary> m_pInfoDict;
207 
208   // Vector of pairs to know current position in the page tree. The index in the
209   // vector corresponds to the level being described. The pair contains a
210   // pointer to the dictionary being processed at the level, and an index of the
211   // of the child being processed within the dictionary's /Kids array.
212   std::vector<std::pair<RetainPtr<CPDF_Dictionary>, size_t>> m_pTreeTraversal;
213 
214   // True if the CPDF_Parser succeeded without having to rebuild the cross
215   // reference table.
216   bool m_bHasValidCrossReferenceTable = false;
217 
218   // Index of the next page that will be traversed from the page tree.
219   bool m_bReachedMaxPageLevel = false;
220   int m_iNextPageToTraverse = 0;
221   uint32_t m_ParsedPageCount = 0;
222 
223   std::unique_ptr<RenderDataIface> const m_pDocRender;
224   // Must be after `m_pDocRender`.
225   std::unique_ptr<PageDataIface> const m_pDocPage;
226   std::unique_ptr<JBig2_DocumentContext> m_pCodecContext;
227   std::unique_ptr<LinkListIface> m_pLinksContext;
228   std::set<uint32_t> m_ModifiedAPStreamIDs;
229   std::vector<uint32_t> m_PageList;  // Page number to page's dict objnum.
230 
231   // Must be second to last.
232   StockFontClearer m_StockFontClearer;
233 
234   // Must be last. Destroy the extension before any non-extension teardown.
235   std::unique_ptr<Extension> m_pExtension;
236 };
237 
238 #endif  // CORE_FPDFAPI_PARSER_CPDF_DOCUMENT_H_
239