1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ 9 10 #include <limits> 11 #include <map> 12 #include <memory> 13 #include <set> 14 #include <vector> 15 16 #include "core/fpdfapi/parser/cpdf_cross_ref_table.h" 17 #include "core/fpdfapi/parser/cpdf_indirect_object_holder.h" 18 #include "core/fxcrt/fx_string.h" 19 #include "core/fxcrt/fx_system.h" 20 #include "core/fxcrt/retain_ptr.h" 21 #include "core/fxcrt/unowned_ptr.h" 22 23 class CPDF_Array; 24 class CPDF_CryptoHandler; 25 class CPDF_Dictionary; 26 class CPDF_LinearizedHeader; 27 class CPDF_Object; 28 class CPDF_ObjectStream; 29 class CPDF_ReadValidator; 30 class CPDF_SecurityHandler; 31 class CPDF_SyntaxParser; 32 class IFX_SeekableReadStream; 33 34 class CPDF_Parser { 35 public: 36 class ParsedObjectsHolder : public CPDF_IndirectObjectHolder { 37 public: 38 virtual bool TryInit() = 0; 39 }; 40 41 enum Error { 42 SUCCESS = 0, 43 FILE_ERROR, 44 FORMAT_ERROR, 45 PASSWORD_ERROR, 46 HANDLER_ERROR 47 }; 48 49 // A limit on the maximum object number in the xref table. Theoretical limits 50 // are higher, but this may be large enough in practice. 51 // Note: This was 1M, but https://crbug.com/910009 encountered a PDF with 52 // object numbers in the 1.7M range. The PDF only has 10K objects, but they 53 // are non-consecutive. 54 static constexpr uint32_t kMaxObjectNumber = 4 * 1024 * 1024; 55 56 static const size_t kInvalidPos = std::numeric_limits<size_t>::max(); 57 58 explicit CPDF_Parser(ParsedObjectsHolder* holder); 59 CPDF_Parser(); 60 ~CPDF_Parser(); 61 62 Error StartParse(const RetainPtr<IFX_SeekableReadStream>& pFile, 63 const char* password); 64 Error StartLinearizedParse(const RetainPtr<CPDF_ReadValidator>& validator, 65 const char* password); 66 SetPassword(const char * password)67 void SetPassword(const char* password) { m_Password = password; } GetPassword()68 ByteString GetPassword() const { return m_Password; } 69 70 // Take the GetPassword() value and encode it, if necessary, based on the 71 // password encoding conversion. 72 ByteString GetEncodedPassword() const; 73 74 const CPDF_Dictionary* GetTrailer() const; 75 CPDF_Dictionary* GetMutableTrailerForTesting(); 76 77 // Returns a new trailer which combines the last read trailer with the /Root 78 // and /Info from previous ones. 79 RetainPtr<CPDF_Dictionary> GetCombinedTrailer() const; 80 GetLastXRefOffset()81 FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; } 82 83 uint32_t GetPermissions() const; 84 uint32_t GetRootObjNum() const; 85 uint32_t GetInfoObjNum() const; 86 const CPDF_Array* GetIDArray() const; 87 CPDF_Dictionary* GetRoot() const; 88 89 const CPDF_Dictionary* GetEncryptDict() const; 90 91 RetainPtr<CPDF_Object> ParseIndirectObject(uint32_t objnum); 92 93 uint32_t GetLastObjNum() const; 94 bool IsValidObjectNumber(uint32_t objnum) const; 95 FX_FILESIZE GetObjectPositionOrZero(uint32_t objnum) const; 96 bool IsObjectFreeOrNull(uint32_t objnum) const; GetSecurityHandler()97 const RetainPtr<CPDF_SecurityHandler>& GetSecurityHandler() const { 98 return m_pSecurityHandler; 99 } 100 bool IsObjectFree(uint32_t objnum) const; 101 GetFileVersion()102 int GetFileVersion() const { return m_FileVersion; } IsXRefStream()103 bool IsXRefStream() const { return m_bXRefStream; } 104 105 RetainPtr<CPDF_Object> ParseIndirectObjectAt(FX_FILESIZE pos, 106 uint32_t objnum); 107 108 uint32_t GetFirstPageNo() const; GetLinearizedHeader()109 const CPDF_LinearizedHeader* GetLinearizedHeader() const { 110 return m_pLinearized.get(); 111 } 112 GetCrossRefTable()113 const CPDF_CrossRefTable* GetCrossRefTable() const { 114 return m_CrossRefTable.get(); 115 } 116 xref_table_rebuilt()117 bool xref_table_rebuilt() const { return m_bXRefTableRebuilt; } 118 GetSyntax()119 CPDF_SyntaxParser* GetSyntax() const { return m_pSyntax.get(); } 120 121 void SetLinearizedHeader(std::unique_ptr<CPDF_LinearizedHeader> pLinearized); 122 123 protected: 124 using ObjectType = CPDF_CrossRefTable::ObjectType; 125 using ObjectInfo = CPDF_CrossRefTable::ObjectInfo; 126 127 bool LoadCrossRefV4(FX_FILESIZE pos, bool bSkip); 128 bool RebuildCrossRef(); 129 130 std::unique_ptr<CPDF_SyntaxParser> m_pSyntax; 131 132 private: 133 friend class cpdf_parser_BadStartXrefShouldNotBuildCrossRefTable_Test; 134 friend class cpdf_parser_ParseStartXRefWithHeaderOffset_Test; 135 friend class cpdf_parser_ParseStartXRef_Test; 136 friend class cpdf_parser_ParseLinearizedWithHeaderOffset_Test; 137 friend class CPDF_DataAvail; 138 139 struct CrossRefObjData { 140 uint32_t obj_num = 0; 141 ObjectInfo info; 142 }; 143 144 Error StartParseInternal(); 145 FX_FILESIZE ParseStartXRef(); 146 bool LoadAllCrossRefV4(FX_FILESIZE xref_offset); 147 bool LoadAllCrossRefV5(FX_FILESIZE xref_offset); 148 bool LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef); 149 RetainPtr<CPDF_Dictionary> LoadTrailerV4(); 150 Error SetEncryptHandler(); 151 void ReleaseEncryptHandler(); 152 bool LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset); 153 bool LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset); 154 Error LoadLinearizedMainXRefTable(); 155 const CPDF_ObjectStream* GetObjectStream(uint32_t object_number); 156 std::unique_ptr<CPDF_LinearizedHeader> ParseLinearizedHeader(); 157 void ShrinkObjectMap(uint32_t size); 158 // A simple check whether the cross reference table matches with 159 // the objects. 160 bool VerifyCrossRefV4(); 161 162 // If out_objects is null, the parser position will be moved to end subsection 163 // without additional validation. 164 bool ParseAndAppendCrossRefSubsectionData( 165 uint32_t start_objnum, 166 uint32_t count, 167 std::vector<CrossRefObjData>* out_objects); 168 bool ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects); 169 void MergeCrossRefObjectsData(const std::vector<CrossRefObjData>& objects); 170 171 bool InitSyntaxParser(const RetainPtr<CPDF_ReadValidator>& validator); 172 bool ParseFileVersion(); 173 174 ObjectType GetObjectType(uint32_t objnum) const; 175 ObjectType GetObjectTypeFromCrossRefStreamType( 176 uint32_t cross_ref_stream_type) const; 177 178 std::unique_ptr<ParsedObjectsHolder> m_pOwnedObjectsHolder; 179 UnownedPtr<ParsedObjectsHolder> m_pObjectsHolder; 180 181 bool m_bHasParsed = false; 182 bool m_bXRefStream = false; 183 bool m_bXRefTableRebuilt = false; 184 int m_FileVersion = 0; 185 // m_CrossRefTable must be destroyed after m_pSecurityHandler due to the 186 // ownership of the ID array data. 187 std::unique_ptr<CPDF_CrossRefTable> m_CrossRefTable; 188 FX_FILESIZE m_LastXRefOffset; 189 RetainPtr<CPDF_SecurityHandler> m_pSecurityHandler; 190 ByteString m_Password; 191 std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized; 192 193 // A map of object numbers to indirect streams. 194 std::map<uint32_t, std::unique_ptr<CPDF_ObjectStream>> m_ObjectStreamMap; 195 196 // All indirect object numbers that are being parsed. 197 std::set<uint32_t> m_ParsingObjNums; 198 199 uint32_t m_MetadataObjnum = 0; 200 }; 201 202 #endif // CORE_FPDFAPI_PARSER_CPDF_PARSER_H_ 203