• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
8 #define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
9 
10 #include <stdint.h>
11 
12 #include <array>
13 #include <memory>
14 #include <vector>
15 
16 #include "core/fpdfapi/parser/cpdf_stream.h"
17 #include "core/fxcrt/data_vector.h"
18 #include "core/fxcrt/fx_types.h"
19 #include "core/fxcrt/retain_ptr.h"
20 #include "core/fxcrt/span.h"
21 #include "core/fxcrt/string_pool_template.h"
22 #include "core/fxcrt/unowned_ptr.h"
23 #include "core/fxcrt/weak_ptr.h"
24 
25 class CPDF_Dictionary;
26 class CPDF_IndirectObjectHolder;
27 class CPDF_Object;
28 class CPDF_ReadValidator;
29 class CPDF_Stream;
30 class IFX_SeekableReadStream;
31 
32 class CPDF_SyntaxParser {
33  public:
34   enum class ParseType : bool { kStrict, kLoose };
35 
36   struct WordResult {
37     ByteString word;
38     bool is_number;
39   };
40 
41   static std::unique_ptr<CPDF_SyntaxParser> CreateForTesting(
42       RetainPtr<IFX_SeekableReadStream> pFileAccess,
43       FX_FILESIZE HeaderOffset);
44 
45   explicit CPDF_SyntaxParser(RetainPtr<IFX_SeekableReadStream> pFileAccess);
46   CPDF_SyntaxParser(RetainPtr<CPDF_ReadValidator> pValidator,
47                     FX_FILESIZE HeaderOffset);
48   ~CPDF_SyntaxParser();
49 
SetReadBufferSize(uint32_t read_buffer_size)50   void SetReadBufferSize(uint32_t read_buffer_size) {
51     m_ReadBufferSize = read_buffer_size;
52   }
53 
GetPos()54   FX_FILESIZE GetPos() const { return m_Pos; }
55   void SetPos(FX_FILESIZE pos);
56 
57   RetainPtr<CPDF_Object> GetObjectBody(CPDF_IndirectObjectHolder* pObjList);
58   RetainPtr<CPDF_Object> GetIndirectObject(CPDF_IndirectObjectHolder* pObjList,
59                                            ParseType parse_type);
60 
61   ByteString GetKeyword();
62   void ToNextLine();
63   void ToNextWord();
64   void RecordingToNextWord();
65   bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit);
66   FX_FILESIZE FindTag(ByteStringView tag);
67   bool ReadBlock(pdfium::span<uint8_t> buffer);
68   bool GetCharAt(FX_FILESIZE pos, uint8_t& ch);
69   WordResult GetNextWord();
70   ByteString PeekNextWord();
71 
72   RetainPtr<CPDF_ReadValidator> GetValidator() const;
73   uint32_t GetDirectNum();
74   bool GetNextChar(uint8_t& ch);
75 
76   // The document size may be smaller than the file size.
77   // The syntax parser use position relative to document
78   // offset (|m_HeaderOffset|).
79   // The document size will be FileSize - "Header offset".
80   // All offsets was readed from document, should not be great than document
81   // size. Use it for checks instead of real file size.
82   FX_FILESIZE GetDocumentSize() const;
83 
84   ByteString ReadString();
85   DataVector<uint8_t> ReadHexString();
86 
SetTrailerEnds(std::vector<unsigned int> * trailer_ends)87   void SetTrailerEnds(std::vector<unsigned int>* trailer_ends) {
88     m_TrailerEnds = trailer_ends;
89   }
90 
91  private:
92   enum class WordType : bool { kWord, kNumber };
93 
94   friend class CPDF_DataAvail;
95   friend class cpdf_syntax_parser_ReadHexString_Test;
96 
97   static constexpr int kParserMaxRecursionDepth = 64;
98   static int s_CurrentRecursionDepth;
99 
100   bool ReadBlockAt(FX_FILESIZE read_pos);
101   bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch);
102   WordType GetNextWordInternal();
103   bool IsWholeWord(FX_FILESIZE startpos,
104                    FX_FILESIZE limit,
105                    ByteStringView tag,
106                    bool checkKeyword);
107 
108   unsigned int ReadEOLMarkers(FX_FILESIZE pos);
109   FX_FILESIZE FindWordPos(ByteStringView word);
110   FX_FILESIZE FindStreamEndPos();
111   RetainPtr<CPDF_Stream> ReadStream(RetainPtr<CPDF_Dictionary> pDict);
112 
113   bool IsPositionRead(FX_FILESIZE pos) const;
114 
115   RetainPtr<CPDF_Object> GetObjectBodyInternal(
116       CPDF_IndirectObjectHolder* pObjList,
117       ParseType parse_type);
118 
119   RetainPtr<CPDF_ReadValidator> m_pFileAccess;
120   // The syntax parser use position relative to header offset.
121   // The header contains at file start, and can follow after some stuff. We
122   // ignore this stuff.
123   const FX_FILESIZE m_HeaderOffset;
124   const FX_FILESIZE m_FileLen;
125   FX_FILESIZE m_Pos = 0;
126   WeakPtr<ByteStringPool> m_pPool;
127   DataVector<uint8_t> m_pFileBuf;
128   FX_FILESIZE m_BufOffset = 0;
129   uint32_t m_WordSize = 0;
130   uint32_t m_ReadBufferSize = CPDF_Stream::kFileBufSize;
131   std::array<uint8_t, 257> m_WordBuffer = {};
132 
133   // The syntax parser records traversed trailer end byte offsets here.
134   UnownedPtr<std::vector<unsigned int>> m_TrailerEnds;
135 };
136 
137 #endif  // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
138