1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 9 10 #include <memory> 11 #include <vector> 12 13 #include "core/fpdfapi/parser/cpdf_stream.h" 14 #include "core/fxcrt/string_pool_template.h" 15 #include "core/fxcrt/weak_ptr.h" 16 17 class CPDF_CryptoHandler; 18 class CPDF_Dictionary; 19 class CPDF_IndirectObjectHolder; 20 class CPDF_Object; 21 class CPDF_ReadValidator; 22 class CPDF_Stream; 23 class IFX_SeekableReadStream; 24 25 class CPDF_SyntaxParser { 26 public: 27 enum class ParseType { kStrict, kLoose }; 28 29 static std::unique_ptr<CPDF_SyntaxParser> CreateForTesting( 30 const RetainPtr<IFX_SeekableReadStream>& pFileAccess, 31 FX_FILESIZE HeaderOffset); 32 33 explicit CPDF_SyntaxParser( 34 const RetainPtr<IFX_SeekableReadStream>& pFileAccess); 35 CPDF_SyntaxParser(const RetainPtr<CPDF_ReadValidator>& pValidator, 36 FX_FILESIZE HeaderOffset); 37 ~CPDF_SyntaxParser(); 38 SetReadBufferSize(uint32_t read_buffer_size)39 void SetReadBufferSize(uint32_t read_buffer_size) { 40 m_ReadBufferSize = read_buffer_size; 41 } 42 GetPos()43 FX_FILESIZE GetPos() const { return m_Pos; } 44 void SetPos(FX_FILESIZE pos); 45 46 RetainPtr<CPDF_Object> GetObjectBody(CPDF_IndirectObjectHolder* pObjList); 47 48 RetainPtr<CPDF_Object> GetIndirectObject(CPDF_IndirectObjectHolder* pObjList, 49 ParseType parse_type); 50 51 ByteString GetKeyword(); 52 void ToNextLine(); 53 void ToNextWord(); 54 bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit); 55 FX_FILESIZE FindTag(ByteStringView tag); 56 bool ReadBlock(uint8_t* pBuf, uint32_t size); 57 bool GetCharAt(FX_FILESIZE pos, uint8_t& ch); 58 ByteString GetNextWord(bool* bIsNumber); 59 ByteString PeekNextWord(bool* bIsNumber); 60 GetValidator()61 const RetainPtr<CPDF_ReadValidator>& GetValidator() const { 62 return m_pFileAccess; 63 } 64 uint32_t GetDirectNum(); 65 bool GetNextChar(uint8_t& ch); 66 67 // The document size may be smaller than the file size. 68 // The syntax parser use position relative to document 69 // offset (|m_HeaderOffset|). 70 // The document size will be FileSize - "Header offset". 71 // All offsets was readed from document, should not be great than document 72 // size. Use it for checks instead of real file size. 73 FX_FILESIZE GetDocumentSize() const; 74 75 ByteString ReadString(); 76 ByteString ReadHexString(); 77 78 private: 79 friend class CPDF_DataAvail; 80 friend class cpdf_syntax_parser_ReadHexString_Test; 81 82 static const int kParserMaxRecursionDepth = 64; 83 static int s_CurrentRecursionDepth; 84 85 bool ReadBlockAt(FX_FILESIZE read_pos); 86 bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch); 87 void GetNextWordInternal(bool* bIsNumber); 88 bool IsWholeWord(FX_FILESIZE startpos, 89 FX_FILESIZE limit, 90 ByteStringView tag, 91 bool checkKeyword); 92 93 unsigned int ReadEOLMarkers(FX_FILESIZE pos); 94 FX_FILESIZE FindWordPos(ByteStringView word); 95 FX_FILESIZE FindStreamEndPos(); 96 RetainPtr<CPDF_Stream> ReadStream(RetainPtr<CPDF_Dictionary> pDict); 97 98 bool IsPositionRead(FX_FILESIZE pos) const; 99 100 RetainPtr<CPDF_Object> GetObjectBodyInternal( 101 CPDF_IndirectObjectHolder* pObjList, 102 ParseType parse_type); 103 104 RetainPtr<CPDF_ReadValidator> m_pFileAccess; 105 // The syntax parser use position relative to header offset. 106 // The header contains at file start, and can follow after some stuff. We 107 // ignore this stuff. 108 const FX_FILESIZE m_HeaderOffset; 109 const FX_FILESIZE m_FileLen; 110 FX_FILESIZE m_Pos = 0; 111 WeakPtr<ByteStringPool> m_pPool; 112 std::vector<uint8_t> m_pFileBuf; 113 FX_FILESIZE m_BufOffset = 0; 114 uint32_t m_WordSize = 0; 115 uint8_t m_WordBuffer[257]; 116 uint32_t m_ReadBufferSize = CPDF_Stream::kFileBufSize; 117 }; 118 119 #endif // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 120