1 // Copyright 2016 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 9 10 #include <stdint.h> 11 12 #include <memory> 13 #include <vector> 14 15 #include "core/fpdfapi/parser/cpdf_stream.h" 16 #include "core/fxcrt/data_vector.h" 17 #include "core/fxcrt/fx_types.h" 18 #include "core/fxcrt/retain_ptr.h" 19 #include "core/fxcrt/string_pool_template.h" 20 #include "core/fxcrt/unowned_ptr.h" 21 #include "core/fxcrt/weak_ptr.h" 22 #include "third_party/base/span.h" 23 24 class CPDF_Dictionary; 25 class CPDF_IndirectObjectHolder; 26 class CPDF_Object; 27 class CPDF_ReadValidator; 28 class CPDF_Stream; 29 class IFX_SeekableReadStream; 30 31 class CPDF_SyntaxParser { 32 public: 33 enum class ParseType : bool { kStrict, kLoose }; 34 35 struct WordResult { 36 ByteString word; 37 bool is_number; 38 }; 39 40 static std::unique_ptr<CPDF_SyntaxParser> CreateForTesting( 41 RetainPtr<IFX_SeekableReadStream> pFileAccess, 42 FX_FILESIZE HeaderOffset); 43 44 explicit CPDF_SyntaxParser(RetainPtr<IFX_SeekableReadStream> pFileAccess); 45 CPDF_SyntaxParser(RetainPtr<CPDF_ReadValidator> pValidator, 46 FX_FILESIZE HeaderOffset); 47 ~CPDF_SyntaxParser(); 48 SetReadBufferSize(uint32_t read_buffer_size)49 void SetReadBufferSize(uint32_t read_buffer_size) { 50 m_ReadBufferSize = read_buffer_size; 51 } 52 GetPos()53 FX_FILESIZE GetPos() const { return m_Pos; } 54 void SetPos(FX_FILESIZE pos); 55 56 RetainPtr<CPDF_Object> GetObjectBody(CPDF_IndirectObjectHolder* pObjList); 57 RetainPtr<CPDF_Object> GetIndirectObject(CPDF_IndirectObjectHolder* pObjList, 58 ParseType parse_type); 59 60 ByteString GetKeyword(); 61 void ToNextLine(); 62 void ToNextWord(); 63 void RecordingToNextWord(); 64 bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit); 65 FX_FILESIZE FindTag(ByteStringView tag); 66 bool ReadBlock(pdfium::span<uint8_t> buffer); 67 bool GetCharAt(FX_FILESIZE pos, uint8_t& ch); 68 WordResult GetNextWord(); 69 ByteString PeekNextWord(); 70 71 RetainPtr<CPDF_ReadValidator> GetValidator() const; 72 uint32_t GetDirectNum(); 73 bool GetNextChar(uint8_t& ch); 74 75 // The document size may be smaller than the file size. 76 // The syntax parser use position relative to document 77 // offset (|m_HeaderOffset|). 78 // The document size will be FileSize - "Header offset". 79 // All offsets was readed from document, should not be great than document 80 // size. Use it for checks instead of real file size. 81 FX_FILESIZE GetDocumentSize() const; 82 83 ByteString ReadString(); 84 ByteString ReadHexString(); 85 SetTrailerEnds(std::vector<unsigned int> * trailer_ends)86 void SetTrailerEnds(std::vector<unsigned int>* trailer_ends) { 87 m_TrailerEnds = trailer_ends; 88 } 89 90 private: 91 enum class WordType : bool { kWord, kNumber }; 92 93 friend class CPDF_DataAvail; 94 friend class cpdf_syntax_parser_ReadHexString_Test; 95 96 static constexpr int kParserMaxRecursionDepth = 64; 97 static int s_CurrentRecursionDepth; 98 99 bool ReadBlockAt(FX_FILESIZE read_pos); 100 bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch); 101 WordType GetNextWordInternal(); 102 bool IsWholeWord(FX_FILESIZE startpos, 103 FX_FILESIZE limit, 104 ByteStringView tag, 105 bool checkKeyword); 106 107 unsigned int ReadEOLMarkers(FX_FILESIZE pos); 108 FX_FILESIZE FindWordPos(ByteStringView word); 109 FX_FILESIZE FindStreamEndPos(); 110 RetainPtr<CPDF_Stream> ReadStream(RetainPtr<CPDF_Dictionary> pDict); 111 112 bool IsPositionRead(FX_FILESIZE pos) const; 113 114 RetainPtr<CPDF_Object> GetObjectBodyInternal( 115 CPDF_IndirectObjectHolder* pObjList, 116 ParseType parse_type); 117 118 RetainPtr<CPDF_ReadValidator> m_pFileAccess; 119 // The syntax parser use position relative to header offset. 120 // The header contains at file start, and can follow after some stuff. We 121 // ignore this stuff. 122 const FX_FILESIZE m_HeaderOffset; 123 const FX_FILESIZE m_FileLen; 124 FX_FILESIZE m_Pos = 0; 125 WeakPtr<ByteStringPool> m_pPool; 126 DataVector<uint8_t> m_pFileBuf; 127 FX_FILESIZE m_BufOffset = 0; 128 uint32_t m_WordSize = 0; 129 uint8_t m_WordBuffer[257] = {}; 130 uint32_t m_ReadBufferSize = CPDF_Stream::kFileBufSize; 131 132 // The syntax parser records traversed trailer end byte offsets here. 133 UnownedPtr<std::vector<unsigned int>> m_TrailerEnds; 134 }; 135 136 #endif // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_ 137