1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 9 10 #include <algorithm> 11 #include <memory> 12 #include <vector> 13 14 #include "core/fxcrt/string_pool_template.h" 15 #include "core/fxcrt/weak_ptr.h" 16 17 class CPDF_CryptoHandler; 18 class CPDF_Dictionary; 19 class CPDF_IndirectObjectHolder; 20 class CPDF_Object; 21 class CPDF_ReadValidator; 22 class CPDF_Stream; 23 class IFX_SeekableReadStream; 24 25 class CPDF_SyntaxParser { 26 public: 27 enum class ParseType { kStrict, kLoose }; 28 29 CPDF_SyntaxParser(); 30 explicit CPDF_SyntaxParser(const WeakPtr<ByteStringPool>& pPool); 31 ~CPDF_SyntaxParser(); 32 33 void InitParser(const RetainPtr<IFX_SeekableReadStream>& pFileAccess, 34 uint32_t HeaderOffset); 35 36 void InitParserWithValidator(const RetainPtr<CPDF_ReadValidator>& pValidator, 37 uint32_t HeaderOffset); 38 GetPos()39 FX_FILESIZE GetPos() const { return m_Pos; } SetPos(FX_FILESIZE pos)40 void SetPos(FX_FILESIZE pos) { m_Pos = std::min(pos, m_FileLen); } 41 42 std::unique_ptr<CPDF_Object> GetObjectBody( 43 CPDF_IndirectObjectHolder* pObjList); 44 45 std::unique_ptr<CPDF_Object> GetIndirectObject( 46 CPDF_IndirectObjectHolder* pObjList, 47 ParseType parse_type); 48 49 ByteString GetKeyword(); 50 void ToNextLine(); 51 void ToNextWord(); 52 bool BackwardsSearchToWord(const ByteStringView& word, FX_FILESIZE limit); 53 FX_FILESIZE FindTag(const ByteStringView& tag, FX_FILESIZE limit); 54 bool ReadBlock(uint8_t* pBuf, uint32_t size); 55 bool GetCharAt(FX_FILESIZE pos, uint8_t& ch); 56 ByteString GetNextWord(bool* bIsNumber); 57 ByteString PeekNextWord(bool* bIsNumber); 58 59 RetainPtr<IFX_SeekableReadStream> GetFileAccess() const; 60 GetValidator()61 const RetainPtr<CPDF_ReadValidator>& GetValidator() const { 62 return m_pFileAccess; 63 } 64 65 private: 66 friend class CPDF_Parser; 67 friend class CPDF_DataAvail; 68 friend class cpdf_syntax_parser_ReadHexString_Test; 69 70 static const int kParserMaxRecursionDepth = 64; 71 static int s_CurrentRecursionDepth; 72 73 uint32_t GetDirectNum(); 74 bool ReadBlockAt(FX_FILESIZE read_pos); 75 bool GetNextChar(uint8_t& ch); 76 bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch); 77 void GetNextWordInternal(bool* bIsNumber); 78 bool IsWholeWord(FX_FILESIZE startpos, 79 FX_FILESIZE limit, 80 const ByteStringView& tag, 81 bool checkKeyword); 82 83 ByteString ReadString(); 84 ByteString ReadHexString(); 85 unsigned int ReadEOLMarkers(FX_FILESIZE pos); 86 std::unique_ptr<CPDF_Stream> ReadStream( 87 std::unique_ptr<CPDF_Dictionary> pDict); 88 89 bool IsPositionRead(FX_FILESIZE pos) const; 90 91 std::unique_ptr<CPDF_Object> GetObjectBodyInternal( 92 CPDF_IndirectObjectHolder* pObjList, 93 ParseType parse_type); 94 95 FX_FILESIZE m_Pos; 96 RetainPtr<CPDF_ReadValidator> m_pFileAccess; 97 FX_FILESIZE m_HeaderOffset; 98 FX_FILESIZE m_FileLen; 99 std::vector<uint8_t> m_pFileBuf; 100 FX_FILESIZE m_BufOffset; 101 uint8_t m_WordBuffer[257]; 102 uint32_t m_WordSize; 103 WeakPtr<ByteStringPool> m_pPool; 104 }; 105 106 #endif // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 107