1 // Copyright 2016 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 9 10 #include <stdint.h> 11 12 #include <array> 13 #include <memory> 14 #include <vector> 15 16 #include "core/fpdfapi/parser/cpdf_stream.h" 17 #include "core/fxcrt/data_vector.h" 18 #include "core/fxcrt/fx_types.h" 19 #include "core/fxcrt/retain_ptr.h" 20 #include "core/fxcrt/span.h" 21 #include "core/fxcrt/string_pool_template.h" 22 #include "core/fxcrt/unowned_ptr.h" 23 #include "core/fxcrt/weak_ptr.h" 24 25 class CPDF_Dictionary; 26 class CPDF_IndirectObjectHolder; 27 class CPDF_Object; 28 class CPDF_ReadValidator; 29 class CPDF_Stream; 30 class IFX_SeekableReadStream; 31 32 class CPDF_SyntaxParser { 33 public: 34 enum class ParseType : bool { kStrict, kLoose }; 35 36 struct WordResult { 37 ByteString word; 38 bool is_number; 39 }; 40 41 static std::unique_ptr<CPDF_SyntaxParser> CreateForTesting( 42 RetainPtr<IFX_SeekableReadStream> pFileAccess, 43 FX_FILESIZE HeaderOffset); 44 45 explicit CPDF_SyntaxParser(RetainPtr<IFX_SeekableReadStream> pFileAccess); 46 CPDF_SyntaxParser(RetainPtr<CPDF_ReadValidator> pValidator, 47 FX_FILESIZE HeaderOffset); 48 ~CPDF_SyntaxParser(); 49 SetReadBufferSize(uint32_t read_buffer_size)50 void SetReadBufferSize(uint32_t read_buffer_size) { 51 m_ReadBufferSize = read_buffer_size; 52 } 53 GetPos()54 FX_FILESIZE GetPos() const { return m_Pos; } 55 void SetPos(FX_FILESIZE pos); 56 57 RetainPtr<CPDF_Object> GetObjectBody(CPDF_IndirectObjectHolder* pObjList); 58 RetainPtr<CPDF_Object> GetIndirectObject(CPDF_IndirectObjectHolder* pObjList, 59 ParseType parse_type); 60 61 ByteString GetKeyword(); 62 void ToNextLine(); 63 void ToNextWord(); 64 void RecordingToNextWord(); 65 bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit); 66 FX_FILESIZE FindTag(ByteStringView tag); 67 bool ReadBlock(pdfium::span<uint8_t> buffer); 68 bool GetCharAt(FX_FILESIZE pos, uint8_t& ch); 69 WordResult GetNextWord(); 70 ByteString PeekNextWord(); 71 72 RetainPtr<CPDF_ReadValidator> GetValidator() const; 73 uint32_t GetDirectNum(); 74 bool GetNextChar(uint8_t& ch); 75 76 // The document size may be smaller than the file size. 77 // The syntax parser use position relative to document 78 // offset (|m_HeaderOffset|). 79 // The document size will be FileSize - "Header offset". 80 // All offsets was readed from document, should not be great than document 81 // size. Use it for checks instead of real file size. 82 FX_FILESIZE GetDocumentSize() const; 83 84 ByteString ReadString(); 85 DataVector<uint8_t> ReadHexString(); 86 SetTrailerEnds(std::vector<unsigned int> * trailer_ends)87 void SetTrailerEnds(std::vector<unsigned int>* trailer_ends) { 88 m_TrailerEnds = trailer_ends; 89 } 90 91 private: 92 enum class WordType : bool { kWord, kNumber }; 93 94 friend class CPDF_DataAvail; 95 friend class cpdf_syntax_parser_ReadHexString_Test; 96 97 static constexpr int kParserMaxRecursionDepth = 64; 98 static int s_CurrentRecursionDepth; 99 100 bool ReadBlockAt(FX_FILESIZE read_pos); 101 bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch); 102 WordType GetNextWordInternal(); 103 bool IsWholeWord(FX_FILESIZE startpos, 104 FX_FILESIZE limit, 105 ByteStringView tag, 106 bool checkKeyword); 107 108 unsigned int ReadEOLMarkers(FX_FILESIZE pos); 109 FX_FILESIZE FindWordPos(ByteStringView word); 110 FX_FILESIZE FindStreamEndPos(); 111 RetainPtr<CPDF_Stream> ReadStream(RetainPtr<CPDF_Dictionary> pDict); 112 113 bool IsPositionRead(FX_FILESIZE pos) const; 114 115 RetainPtr<CPDF_Object> GetObjectBodyInternal( 116 CPDF_IndirectObjectHolder* pObjList, 117 ParseType parse_type); 118 119 RetainPtr<CPDF_ReadValidator> m_pFileAccess; 120 // The syntax parser use position relative to header offset. 121 // The header contains at file start, and can follow after some stuff. We 122 // ignore this stuff. 123 const FX_FILESIZE m_HeaderOffset; 124 const FX_FILESIZE m_FileLen; 125 FX_FILESIZE m_Pos = 0; 126 WeakPtr<ByteStringPool> m_pPool; 127 DataVector<uint8_t> m_pFileBuf; 128 FX_FILESIZE m_BufOffset = 0; 129 uint32_t m_WordSize = 0; 130 uint32_t m_ReadBufferSize = CPDF_Stream::kFileBufSize; 131 std::array<uint8_t, 257> m_WordBuffer = {}; 132 133 // The syntax parser records traversed trailer end byte offsets here. 134 UnownedPtr<std::vector<unsigned int>> m_TrailerEnds; 135 }; 136 137 #endif // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 138