• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
8 #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
9 
10 #include <memory>
11 #include <set>
12 #include <vector>
13 
14 #include "core/fpdfapi/parser/cpdf_parser.h"
15 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
16 #include "core/fxcrt/fx_basic.h"
17 
18 class CPDF_Dictionary;
19 class CPDF_HintTables;
20 class CPDF_IndirectObjectHolder;
21 class CPDF_LinearizedHeader;
22 class CPDF_Parser;
23 
24 enum PDF_DATAAVAIL_STATUS {
25   PDF_DATAAVAIL_HEADER = 0,
26   PDF_DATAAVAIL_FIRSTPAGE,
27   PDF_DATAAVAIL_HINTTABLE,
28   PDF_DATAAVAIL_END,
29   PDF_DATAAVAIL_CROSSREF,
30   PDF_DATAAVAIL_CROSSREF_ITEM,
31   PDF_DATAAVAIL_CROSSREF_STREAM,
32   PDF_DATAAVAIL_TRAILER,
33   PDF_DATAAVAIL_LOADALLCROSSREF,
34   PDF_DATAAVAIL_ROOT,
35   PDF_DATAAVAIL_INFO,
36   PDF_DATAAVAIL_ACROFORM,
37   PDF_DATAAVAIL_ACROFORM_SUBOBJECT,
38   PDF_DATAAVAIL_PAGETREE,
39   PDF_DATAAVAIL_PAGE,
40   PDF_DATAAVAIL_PAGE_LATERLOAD,
41   PDF_DATAAVAIL_RESOURCES,
42   PDF_DATAAVAIL_DONE,
43   PDF_DATAAVAIL_ERROR,
44   PDF_DATAAVAIL_LOADALLFILE,
45   PDF_DATAAVAIL_TRAILER_APPEND
46 };
47 
48 enum PDF_PAGENODE_TYPE {
49   PDF_PAGENODE_UNKNOWN = 0,
50   PDF_PAGENODE_PAGE,
51   PDF_PAGENODE_PAGES,
52   PDF_PAGENODE_ARRAY,
53 };
54 
55 class CPDF_DataAvail final {
56  public:
57   // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot
58   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
59   // to make sure the two sets of values match.
60   enum DocAvailStatus {
61     DataError = -1,        // PDF_DATA_ERROR
62     DataNotAvailable = 0,  // PDF_DATA_NOTAVAIL
63     DataAvailable = 1,     // PDF_DATA_AVAIL
64   };
65 
66   // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot
67   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
68   // to make sure the two sets of values match.
69   enum DocLinearizationStatus {
70     LinearizationUnknown = -1,  // PDF_LINEARIZATION_UNKNOWN
71     NotLinearized = 0,          // PDF_NOT_LINEARIZED
72     Linearized = 1,             // PDF_LINEARIZED
73   };
74 
75   // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot
76   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
77   // to make sure the two sets of values match.
78   enum DocFormStatus {
79     FormError = -1,        // PDF_FORM_ERROR
80     FormNotAvailable = 0,  // PDF_FORM_NOTAVAIL
81     FormAvailable = 1,     // PDF_FORM_AVAIL
82     FormNotExist = 2,      // PDF_FORM_NOTEXIST
83   };
84 
85   class FileAvail {
86    public:
87     virtual ~FileAvail();
88     virtual bool IsDataAvail(FX_FILESIZE offset, uint32_t size) = 0;
89   };
90 
91   class DownloadHints {
92    public:
93     virtual ~DownloadHints();
94     virtual void AddSegment(FX_FILESIZE offset, uint32_t size) = 0;
95   };
96 
97   CPDF_DataAvail(FileAvail* pFileAvail,
98                  const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead,
99                  bool bSupportHintTable);
100   ~CPDF_DataAvail();
101 
102   bool IsDataAvail(FX_FILESIZE offset, uint32_t size, DownloadHints* pHints);
103   DocAvailStatus IsDocAvail(DownloadHints* pHints);
104   void SetDocument(CPDF_Document* pDoc);
105   DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints);
106   DocFormStatus IsFormAvail(DownloadHints* pHints);
107   DocLinearizationStatus IsLinearizedPDF();
108   bool IsLinearized();
109   void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, uint32_t* pSize);
GetFileRead()110   CFX_RetainPtr<IFX_SeekableReadStream> GetFileRead() const {
111     return m_pFileRead;
112   }
113   int GetPageCount() const;
114   CPDF_Dictionary* GetPage(int index);
115 
116  protected:
117   class PageNode {
118    public:
119     PageNode();
120     ~PageNode();
121 
122     PDF_PAGENODE_TYPE m_type;
123     uint32_t m_dwPageNo;
124     std::vector<std::unique_ptr<PageNode>> m_ChildNodes;
125   };
126 
127   static const int kMaxDataAvailRecursionDepth = 64;
128   static int s_CurrentDataAvailRecursionDepth;
129   static const int kMaxPageRecursionDepth = 1024;
130 
131   uint32_t GetObjectSize(uint32_t objnum, FX_FILESIZE& offset);
132   bool AreObjectsAvailable(std::vector<CPDF_Object*>& obj_array,
133                            bool bParsePage,
134                            DownloadHints* pHints,
135                            std::vector<CPDF_Object*>& ret_array);
136   bool CheckDocStatus(DownloadHints* pHints);
137   bool CheckHeader(DownloadHints* pHints);
138   bool CheckFirstPage(DownloadHints* pHints);
139   bool CheckHintTables(DownloadHints* pHints);
140   bool CheckEnd(DownloadHints* pHints);
141   bool CheckCrossRef(DownloadHints* pHints);
142   bool CheckCrossRefItem(DownloadHints* pHints);
143   bool CheckTrailer(DownloadHints* pHints);
144   bool CheckRoot(DownloadHints* pHints);
145   bool CheckInfo(DownloadHints* pHints);
146   bool CheckPages(DownloadHints* pHints);
147   bool CheckPage(DownloadHints* pHints);
148   bool CheckResources(DownloadHints* pHints);
149   bool CheckAnnots(DownloadHints* pHints);
150   bool CheckAcroForm(DownloadHints* pHints);
151   bool CheckAcroFormSubObject(DownloadHints* pHints);
152   bool CheckTrailerAppend(DownloadHints* pHints);
153   bool CheckPageStatus(DownloadHints* pHints);
154   bool CheckAllCrossRefStream(DownloadHints* pHints);
155 
156   int32_t CheckCrossRefStream(DownloadHints* pHints, FX_FILESIZE& xref_offset);
157   bool IsLinearizedFile(uint8_t* pData, uint32_t dwLen);
158   void SetStartOffset(FX_FILESIZE dwOffset);
159   bool GetNextToken(CFX_ByteString& token);
160   bool GetNextChar(uint8_t& ch);
161   std::unique_ptr<CPDF_Object> ParseIndirectObjectAt(
162       FX_FILESIZE pos,
163       uint32_t objnum,
164       CPDF_IndirectObjectHolder* pObjList = nullptr);
165   std::unique_ptr<CPDF_Object> GetObject(uint32_t objnum,
166                                          DownloadHints* pHints,
167                                          bool* pExistInFile);
168   bool GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages);
169   bool PreparePageItem();
170   bool LoadPages(DownloadHints* pHints);
171   bool LoadAllXref(DownloadHints* pHints);
172   bool LoadAllFile(DownloadHints* pHints);
173   DocAvailStatus CheckLinearizedData(DownloadHints* pHints);
174   bool CheckPageAnnots(uint32_t dwPage, DownloadHints* pHints);
175 
176   DocAvailStatus CheckLinearizedFirstPage(uint32_t dwPage,
177                                           DownloadHints* pHints);
178   bool HaveResourceAncestor(CPDF_Dictionary* pDict);
179   bool CheckPage(uint32_t dwPage, DownloadHints* pHints);
180   bool LoadDocPages(DownloadHints* pHints);
181   bool LoadDocPage(uint32_t dwPage, DownloadHints* pHints);
182   bool CheckPageNode(const PageNode& pageNode,
183                      int32_t iPage,
184                      int32_t& iCount,
185                      DownloadHints* pHints,
186                      int level);
187   bool CheckUnknownPageNode(uint32_t dwPageNo,
188                             PageNode* pPageNode,
189                             DownloadHints* pHints);
190   bool CheckArrayPageNode(uint32_t dwPageNo,
191                           PageNode* pPageNode,
192                           DownloadHints* pHints);
193   bool CheckPageCount(DownloadHints* pHints);
194   bool IsFirstCheck(uint32_t dwPage);
195   void ResetFirstCheck(uint32_t dwPage);
196   bool ValidatePage(uint32_t dwPage);
197   bool ValidateForm();
198 
199   FileAvail* const m_pFileAvail;
200   CFX_RetainPtr<IFX_SeekableReadStream> m_pFileRead;
201   CPDF_Parser m_parser;
202   CPDF_SyntaxParser m_syntaxParser;
203   std::unique_ptr<CPDF_Object> m_pRoot;
204   uint32_t m_dwRootObjNum;
205   uint32_t m_dwInfoObjNum;
206   std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
207   CPDF_Object* m_pTrailer;
208   bool m_bDocAvail;
209   FX_FILESIZE m_dwHeaderOffset;
210   FX_FILESIZE m_dwLastXRefOffset;
211   FX_FILESIZE m_dwXRefOffset;
212   FX_FILESIZE m_dwTrailerOffset;
213   FX_FILESIZE m_dwCurrentOffset;
214   PDF_DATAAVAIL_STATUS m_docStatus;
215   FX_FILESIZE m_dwFileLen;
216   CPDF_Document* m_pDocument;
217   std::set<uint32_t> m_ObjectSet;
218   std::vector<CPDF_Object*> m_objs_array;
219   FX_FILESIZE m_Pos;
220   FX_FILESIZE m_bufferOffset;
221   uint32_t m_bufferSize;
222   CFX_ByteString m_WordBuf;
223   uint8_t m_bufferData[512];
224   std::vector<uint32_t> m_XRefStreamList;
225   std::vector<uint32_t> m_PageObjList;
226   uint32_t m_PagesObjNum;
227   bool m_bLinearedDataOK;
228   bool m_bMainXRefLoadTried;
229   bool m_bMainXRefLoadedOK;
230   bool m_bPagesTreeLoad;
231   bool m_bPagesLoad;
232   CPDF_Parser* m_pCurrentParser;
233   FX_FILESIZE m_dwCurrentXRefSteam;
234   bool m_bAnnotsLoad;
235   bool m_bHaveAcroForm;
236   uint32_t m_dwAcroFormObjNum;
237   bool m_bAcroFormLoad;
238   CPDF_Object* m_pAcroForm;
239   std::vector<CPDF_Object*> m_arrayAcroforms;
240   CPDF_Dictionary* m_pPageDict;
241   CPDF_Object* m_pPageResource;
242   bool m_bNeedDownLoadResource;
243   bool m_bPageLoadedOK;
244   bool m_bLinearizedFormParamLoad;
245   std::vector<std::unique_ptr<CPDF_Object>> m_PagesArray;
246   uint32_t m_dwEncryptObjNum;
247   FX_FILESIZE m_dwPrevXRefOffset;
248   bool m_bTotalLoadPageTree;
249   bool m_bCurPageDictLoadOK;
250   PageNode m_PageNode;
251   std::set<uint32_t> m_pageMapCheckState;
252   std::set<uint32_t> m_pagesLoadState;
253   std::unique_ptr<CPDF_HintTables> m_pHintTables;
254   bool m_bSupportHintTable;
255 };
256 
257 #endif  // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
258