1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ 9 10 #include <map> 11 #include <memory> 12 #include <set> 13 #include <vector> 14 15 #include "core/fxcrt/fx_basic.h" 16 17 class CPDF_Array; 18 class CPDF_CryptoHandler; 19 class CPDF_Dictionary; 20 class CPDF_Document; 21 class CPDF_IndirectObjectHolder; 22 class CPDF_LinearizedHeader; 23 class CPDF_Object; 24 class CPDF_SecurityHandler; 25 class CPDF_StreamAcc; 26 class CPDF_SyntaxParser; 27 class IFX_SeekableReadStream; 28 29 class CPDF_Parser { 30 public: 31 enum Error { 32 SUCCESS = 0, 33 FILE_ERROR, 34 FORMAT_ERROR, 35 PASSWORD_ERROR, 36 HANDLER_ERROR 37 }; 38 39 // A limit on the maximum object number in the xref table. Theoretical limits 40 // are higher, but this may be large enough in practice. 41 static const uint32_t kMaxObjectNumber = 1048576; 42 43 CPDF_Parser(); 44 ~CPDF_Parser(); 45 46 Error StartParse(const CFX_RetainPtr<IFX_SeekableReadStream>& pFile, 47 CPDF_Document* pDocument); 48 Error StartLinearizedParse(const CFX_RetainPtr<IFX_SeekableReadStream>& pFile, 49 CPDF_Document* pDocument); 50 SetPassword(const FX_CHAR * password)51 void SetPassword(const FX_CHAR* password) { m_Password = password; } GetPassword()52 CFX_ByteString GetPassword() { return m_Password; } GetTrailer()53 CPDF_Dictionary* GetTrailer() const { return m_pTrailer.get(); } GetLastXRefOffset()54 FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } 55 56 uint32_t GetPermissions() const; 57 uint32_t GetRootObjNum(); 58 uint32_t GetInfoObjNum(); 59 CPDF_Array* GetIDArray(); 60 GetEncryptDict()61 CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict; } 62 63 std::unique_ptr<CPDF_Object> ParseIndirectObject( 64 CPDF_IndirectObjectHolder* pObjList, 65 uint32_t objnum); 66 67 uint32_t GetLastObjNum() const; 68 bool IsValidObjectNumber(uint32_t objnum) const; 69 FX_FILESIZE GetObjectPositionOrZero(uint32_t objnum) const; 70 uint8_t GetObjectType(uint32_t objnum) const; 71 uint16_t GetObjectGenNum(uint32_t objnum) const; IsVersionUpdated()72 bool IsVersionUpdated() const { return m_bVersionUpdated; } 73 bool IsObjectFreeOrNull(uint32_t objnum) const; 74 CPDF_CryptoHandler* GetCryptoHandler(); 75 CFX_RetainPtr<IFX_SeekableReadStream> GetFileAccess() const; 76 77 FX_FILESIZE GetObjectOffset(uint32_t objnum) const; 78 FX_FILESIZE GetObjectSize(uint32_t objnum) const; 79 80 void GetIndirectBinary(uint32_t objnum, uint8_t*& pBuffer, uint32_t& size); GetFileVersion()81 int GetFileVersion() const { return m_FileVersion; } IsXRefStream()82 bool IsXRefStream() const { return m_bXRefStream; } 83 84 std::unique_ptr<CPDF_Object> ParseIndirectObjectAt( 85 CPDF_IndirectObjectHolder* pObjList, 86 FX_FILESIZE pos, 87 uint32_t objnum); 88 89 std::unique_ptr<CPDF_Object> ParseIndirectObjectAtByStrict( 90 CPDF_IndirectObjectHolder* pObjList, 91 FX_FILESIZE pos, 92 uint32_t objnum, 93 FX_FILESIZE* pResultPos); 94 95 uint32_t GetFirstPageNo() const; 96 97 protected: 98 struct ObjectInfo { ObjectInfoObjectInfo99 ObjectInfo() : pos(0), type(0), gennum(0) {} 100 101 FX_FILESIZE pos; 102 uint8_t type; 103 uint16_t gennum; 104 }; 105 106 std::unique_ptr<CPDF_SyntaxParser> m_pSyntax; 107 std::map<uint32_t, ObjectInfo> m_ObjectInfo; 108 109 bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, bool bSkip); 110 bool RebuildCrossRef(); 111 112 private: 113 friend class CPDF_DataAvail; 114 115 enum class ParserState { 116 kDefault, 117 kComment, 118 kWhitespace, 119 kString, 120 kHexString, 121 kEscapedString, 122 kXref, 123 kObjNum, 124 kPostObjNum, 125 kGenNum, 126 kPostGenNum, 127 kTrailer, 128 kBeginObj, 129 kEndObj 130 }; 131 132 CPDF_Object* ParseDirect(CPDF_Object* pObj); 133 bool LoadAllCrossRefV4(FX_FILESIZE pos); 134 bool LoadAllCrossRefV5(FX_FILESIZE pos); 135 bool LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef); 136 std::unique_ptr<CPDF_Dictionary> LoadTrailerV4(); 137 Error SetEncryptHandler(); 138 void ReleaseEncryptHandler(); 139 bool LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); 140 bool LoadLinearizedCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); 141 bool LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); 142 Error LoadLinearizedMainXRefTable(); 143 CPDF_StreamAcc* GetObjectStream(uint32_t number); 144 bool IsLinearizedFile( 145 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess, 146 uint32_t offset); 147 void SetEncryptDictionary(CPDF_Dictionary* pDict); 148 void ShrinkObjectMap(uint32_t size); 149 // A simple check whether the cross reference table matches with 150 // the objects. 151 bool VerifyCrossRefV4(); 152 153 CPDF_Document* m_pDocument; // not owned 154 bool m_bHasParsed; 155 bool m_bXRefStream; 156 bool m_bVersionUpdated; 157 int m_FileVersion; 158 CPDF_Dictionary* m_pEncryptDict; 159 FX_FILESIZE m_LastXRefOffset; 160 std::unique_ptr<CPDF_SecurityHandler> m_pSecurityHandler; 161 CFX_ByteString m_Password; 162 std::set<FX_FILESIZE> m_SortedOffset; 163 std::unique_ptr<CPDF_Dictionary> m_pTrailer; 164 std::vector<std::unique_ptr<CPDF_Dictionary>> m_Trailers; 165 std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized; 166 uint32_t m_dwXrefStartObjNum; 167 168 // A map of object numbers to indirect streams. Map owns the streams. 169 std::map<uint32_t, std::unique_ptr<CPDF_StreamAcc>> m_ObjectStreamMap; 170 171 // Mapping of object numbers to offsets. The offsets are relative to the first 172 // object in the stream. 173 using StreamObjectCache = std::map<uint32_t, uint32_t>; 174 175 // Mapping of streams to their object caches. This is valid as long as the 176 // streams in |m_ObjectStreamMap| are valid. 177 std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache; 178 179 // All indirect object numbers that are being parsed. 180 std::set<uint32_t> m_ParsingObjNums; 181 }; 182 183 #endif // CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ 184