• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
8 #define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
9 
10 #include <stdint.h>
11 
12 #include <memory>
13 #include <vector>
14 
15 #include "core/fpdfapi/parser/cpdf_stream.h"
16 #include "core/fxcrt/data_vector.h"
17 #include "core/fxcrt/fx_types.h"
18 #include "core/fxcrt/retain_ptr.h"
19 #include "core/fxcrt/string_pool_template.h"
20 #include "core/fxcrt/unowned_ptr.h"
21 #include "core/fxcrt/weak_ptr.h"
22 #include "third_party/base/span.h"
23 
24 class CPDF_Dictionary;
25 class CPDF_IndirectObjectHolder;
26 class CPDF_Object;
27 class CPDF_ReadValidator;
28 class CPDF_Stream;
29 class IFX_SeekableReadStream;
30 
31 class CPDF_SyntaxParser {
32  public:
33   enum class ParseType : bool { kStrict, kLoose };
34 
35   struct WordResult {
36     ByteString word;
37     bool is_number;
38   };
39 
40   static std::unique_ptr<CPDF_SyntaxParser> CreateForTesting(
41       RetainPtr<IFX_SeekableReadStream> pFileAccess,
42       FX_FILESIZE HeaderOffset);
43 
44   explicit CPDF_SyntaxParser(RetainPtr<IFX_SeekableReadStream> pFileAccess);
45   CPDF_SyntaxParser(RetainPtr<CPDF_ReadValidator> pValidator,
46                     FX_FILESIZE HeaderOffset);
47   ~CPDF_SyntaxParser();
48 
SetReadBufferSize(uint32_t read_buffer_size)49   void SetReadBufferSize(uint32_t read_buffer_size) {
50     m_ReadBufferSize = read_buffer_size;
51   }
52 
GetPos()53   FX_FILESIZE GetPos() const { return m_Pos; }
54   void SetPos(FX_FILESIZE pos);
55 
56   RetainPtr<CPDF_Object> GetObjectBody(CPDF_IndirectObjectHolder* pObjList);
57   RetainPtr<CPDF_Object> GetIndirectObject(CPDF_IndirectObjectHolder* pObjList,
58                                            ParseType parse_type);
59 
60   ByteString GetKeyword();
61   void ToNextLine();
62   void ToNextWord();
63   void RecordingToNextWord();
64   bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit);
65   FX_FILESIZE FindTag(ByteStringView tag);
66   bool ReadBlock(pdfium::span<uint8_t> buffer);
67   bool GetCharAt(FX_FILESIZE pos, uint8_t& ch);
68   WordResult GetNextWord();
69   ByteString PeekNextWord();
70 
71   RetainPtr<CPDF_ReadValidator> GetValidator() const;
72   uint32_t GetDirectNum();
73   bool GetNextChar(uint8_t& ch);
74 
75   // The document size may be smaller than the file size.
76   // The syntax parser use position relative to document
77   // offset (|m_HeaderOffset|).
78   // The document size will be FileSize - "Header offset".
79   // All offsets was readed from document, should not be great than document
80   // size. Use it for checks instead of real file size.
81   FX_FILESIZE GetDocumentSize() const;
82 
83   ByteString ReadString();
84   ByteString ReadHexString();
85 
SetTrailerEnds(std::vector<unsigned int> * trailer_ends)86   void SetTrailerEnds(std::vector<unsigned int>* trailer_ends) {
87     m_TrailerEnds = trailer_ends;
88   }
89 
90  private:
91   enum class WordType : bool { kWord, kNumber };
92 
93   friend class CPDF_DataAvail;
94   friend class cpdf_syntax_parser_ReadHexString_Test;
95 
96   static constexpr int kParserMaxRecursionDepth = 64;
97   static int s_CurrentRecursionDepth;
98 
99   bool ReadBlockAt(FX_FILESIZE read_pos);
100   bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch);
101   WordType GetNextWordInternal();
102   bool IsWholeWord(FX_FILESIZE startpos,
103                    FX_FILESIZE limit,
104                    ByteStringView tag,
105                    bool checkKeyword);
106 
107   unsigned int ReadEOLMarkers(FX_FILESIZE pos);
108   FX_FILESIZE FindWordPos(ByteStringView word);
109   FX_FILESIZE FindStreamEndPos();
110   RetainPtr<CPDF_Stream> ReadStream(RetainPtr<CPDF_Dictionary> pDict);
111 
112   bool IsPositionRead(FX_FILESIZE pos) const;
113 
114   RetainPtr<CPDF_Object> GetObjectBodyInternal(
115       CPDF_IndirectObjectHolder* pObjList,
116       ParseType parse_type);
117 
118   RetainPtr<CPDF_ReadValidator> m_pFileAccess;
119   // The syntax parser use position relative to header offset.
120   // The header contains at file start, and can follow after some stuff. We
121   // ignore this stuff.
122   const FX_FILESIZE m_HeaderOffset;
123   const FX_FILESIZE m_FileLen;
124   FX_FILESIZE m_Pos = 0;
125   WeakPtr<ByteStringPool> m_pPool;
126   DataVector<uint8_t> m_pFileBuf;
127   FX_FILESIZE m_BufOffset = 0;
128   uint32_t m_WordSize = 0;
129   uint8_t m_WordBuffer[257] = {};
130   uint32_t m_ReadBufferSize = CPDF_Stream::kFileBufSize;
131 
132   // The syntax parser records traversed trailer end byte offsets here.
133   UnownedPtr<std::vector<unsigned int>> m_TrailerEnds;
134 };
135 
136 #endif  // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
137