• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
8 
9 #include <stdint.h>
10 
11 #include <optional>
12 
13 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
14 #include "core/fxcrt/check_op.h"
15 
CPDF_SimpleParser(pdfium::span<const uint8_t> input)16 CPDF_SimpleParser::CPDF_SimpleParser(pdfium::span<const uint8_t> input)
17     : data_(input) {}
18 
19 CPDF_SimpleParser::~CPDF_SimpleParser() = default;
20 
GetWord()21 ByteStringView CPDF_SimpleParser::GetWord() {
22   std::optional<uint8_t> start_char = SkipSpacesAndComments();
23   if (!start_char.has_value()) {
24     return ByteStringView();
25   }
26 
27   CHECK_GT(cur_position_, 0);
28   uint32_t start_position = cur_position_ - 1;
29   CHECK_LT(start_position, data_.size());
30 
31   if (!PDFCharIsDelimiter(start_char.value())) {
32     return HandleNonDelimiter();
33   }
34 
35   switch (start_char.value()) {
36     case '/':
37       return HandleName();
38     case '<':
39       return HandleBeginAngleBracket();
40     case '>':
41       return HandleEndAngleBracket();
42     case '(':
43       return HandleParentheses();
44     default:
45       return GetDataToCurrentPosition(start_position);
46   }
47 }
48 
GetDataToCurrentPosition(uint32_t start_position) const49 ByteStringView CPDF_SimpleParser::GetDataToCurrentPosition(
50     uint32_t start_position) const {
51   return ByteStringView(
52       data_.subspan(start_position, cur_position_ - start_position));
53 }
54 
SkipSpacesAndComments()55 std::optional<uint8_t> CPDF_SimpleParser::SkipSpacesAndComments() {
56   while (true) {
57     if (cur_position_ >= data_.size()) {
58       return std::nullopt;
59     }
60 
61     // Skip whitespaces.
62     uint8_t cur_char = data_[cur_position_++];
63     while (PDFCharIsWhitespace(cur_char)) {
64       if (cur_position_ >= data_.size()) {
65         return std::nullopt;
66       }
67       cur_char = data_[cur_position_++];
68     }
69 
70     if (cur_char != '%') {
71       return cur_char;
72     }
73 
74     // Skip comments.
75     while (true) {
76       if (cur_position_ >= data_.size()) {
77         return std::nullopt;
78       }
79 
80       cur_char = data_[cur_position_++];
81       if (PDFCharIsLineEnding(cur_char)) {
82         break;
83       }
84     }
85   }
86 }
87 
HandleName()88 ByteStringView CPDF_SimpleParser::HandleName() {
89   uint32_t start_position = cur_position_ - 1;
90   while (cur_position_ < data_.size()) {
91     uint8_t cur_char = data_[cur_position_];
92     // Stop parsing after encountering a whitespace or delimiter.
93     if (PDFCharIsWhitespace(cur_char) || PDFCharIsDelimiter(cur_char)) {
94       return GetDataToCurrentPosition(start_position);
95     }
96     ++cur_position_;
97   }
98   return ByteStringView();
99 }
100 
HandleBeginAngleBracket()101 ByteStringView CPDF_SimpleParser::HandleBeginAngleBracket() {
102   uint32_t start_position = cur_position_ - 1;
103   if (cur_position_ >= data_.size()) {
104     return GetDataToCurrentPosition(start_position);
105   }
106 
107   uint8_t cur_char = data_[cur_position_++];
108   // Stop parsing if encountering "<<".
109   if (cur_char == '<') {
110     return GetDataToCurrentPosition(start_position);
111   }
112 
113   // Continue parsing until end of `data_` or closing bracket.
114   while (cur_position_ < data_.size() && cur_char != '>') {
115     cur_char = data_[cur_position_++];
116   }
117   return GetDataToCurrentPosition(start_position);
118 }
119 
HandleEndAngleBracket()120 ByteStringView CPDF_SimpleParser::HandleEndAngleBracket() {
121   uint32_t start_position = cur_position_ - 1;
122   if (cur_position_ < data_.size() && data_[cur_position_] == '>') {
123     ++cur_position_;
124   }
125   return GetDataToCurrentPosition(start_position);
126 }
127 
HandleParentheses()128 ByteStringView CPDF_SimpleParser::HandleParentheses() {
129   uint32_t start_position = cur_position_ - 1;
130   int level = 1;
131   while (cur_position_ < data_.size() && level > 0) {
132     uint8_t cur_char = data_[cur_position_++];
133     if (cur_char == '(') {
134       ++level;
135     } else if (cur_char == ')') {
136       --level;
137     }
138   }
139   return GetDataToCurrentPosition(start_position);
140 }
141 
HandleNonDelimiter()142 ByteStringView CPDF_SimpleParser::HandleNonDelimiter() {
143   uint32_t start_position = cur_position_ - 1;
144   while (cur_position_ < data_.size()) {
145     uint8_t cur_char = data_[cur_position_];
146     if (PDFCharIsDelimiter(cur_char) || PDFCharIsWhitespace(cur_char)) {
147       break;
148     }
149     ++cur_position_;
150   }
151   return GetDataToCurrentPosition(start_position);
152 }
153