1 // Copyright 2016 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/parser/cpdf_simple_parser.h" 8 9 #include <stdint.h> 10 11 #include <optional> 12 13 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 14 #include "core/fxcrt/check_op.h" 15 CPDF_SimpleParser(pdfium::span<const uint8_t> input)16CPDF_SimpleParser::CPDF_SimpleParser(pdfium::span<const uint8_t> input) 17 : data_(input) {} 18 19 CPDF_SimpleParser::~CPDF_SimpleParser() = default; 20 GetWord()21ByteStringView CPDF_SimpleParser::GetWord() { 22 std::optional<uint8_t> start_char = SkipSpacesAndComments(); 23 if (!start_char.has_value()) { 24 return ByteStringView(); 25 } 26 27 CHECK_GT(cur_position_, 0); 28 uint32_t start_position = cur_position_ - 1; 29 CHECK_LT(start_position, data_.size()); 30 31 if (!PDFCharIsDelimiter(start_char.value())) { 32 return HandleNonDelimiter(); 33 } 34 35 switch (start_char.value()) { 36 case '/': 37 return HandleName(); 38 case '<': 39 return HandleBeginAngleBracket(); 40 case '>': 41 return HandleEndAngleBracket(); 42 case '(': 43 return HandleParentheses(); 44 default: 45 return GetDataToCurrentPosition(start_position); 46 } 47 } 48 GetDataToCurrentPosition(uint32_t start_position) const49ByteStringView CPDF_SimpleParser::GetDataToCurrentPosition( 50 uint32_t start_position) const { 51 return ByteStringView( 52 data_.subspan(start_position, cur_position_ - start_position)); 53 } 54 SkipSpacesAndComments()55std::optional<uint8_t> CPDF_SimpleParser::SkipSpacesAndComments() { 56 while (true) { 57 if (cur_position_ >= data_.size()) { 58 return std::nullopt; 59 } 60 61 // Skip whitespaces. 62 uint8_t cur_char = data_[cur_position_++]; 63 while (PDFCharIsWhitespace(cur_char)) { 64 if (cur_position_ >= data_.size()) { 65 return std::nullopt; 66 } 67 cur_char = data_[cur_position_++]; 68 } 69 70 if (cur_char != '%') { 71 return cur_char; 72 } 73 74 // Skip comments. 75 while (true) { 76 if (cur_position_ >= data_.size()) { 77 return std::nullopt; 78 } 79 80 cur_char = data_[cur_position_++]; 81 if (PDFCharIsLineEnding(cur_char)) { 82 break; 83 } 84 } 85 } 86 } 87 HandleName()88ByteStringView CPDF_SimpleParser::HandleName() { 89 uint32_t start_position = cur_position_ - 1; 90 while (cur_position_ < data_.size()) { 91 uint8_t cur_char = data_[cur_position_]; 92 // Stop parsing after encountering a whitespace or delimiter. 93 if (PDFCharIsWhitespace(cur_char) || PDFCharIsDelimiter(cur_char)) { 94 return GetDataToCurrentPosition(start_position); 95 } 96 ++cur_position_; 97 } 98 return ByteStringView(); 99 } 100 HandleBeginAngleBracket()101ByteStringView CPDF_SimpleParser::HandleBeginAngleBracket() { 102 uint32_t start_position = cur_position_ - 1; 103 if (cur_position_ >= data_.size()) { 104 return GetDataToCurrentPosition(start_position); 105 } 106 107 uint8_t cur_char = data_[cur_position_++]; 108 // Stop parsing if encountering "<<". 109 if (cur_char == '<') { 110 return GetDataToCurrentPosition(start_position); 111 } 112 113 // Continue parsing until end of `data_` or closing bracket. 114 while (cur_position_ < data_.size() && cur_char != '>') { 115 cur_char = data_[cur_position_++]; 116 } 117 return GetDataToCurrentPosition(start_position); 118 } 119 HandleEndAngleBracket()120ByteStringView CPDF_SimpleParser::HandleEndAngleBracket() { 121 uint32_t start_position = cur_position_ - 1; 122 if (cur_position_ < data_.size() && data_[cur_position_] == '>') { 123 ++cur_position_; 124 } 125 return GetDataToCurrentPosition(start_position); 126 } 127 HandleParentheses()128ByteStringView CPDF_SimpleParser::HandleParentheses() { 129 uint32_t start_position = cur_position_ - 1; 130 int level = 1; 131 while (cur_position_ < data_.size() && level > 0) { 132 uint8_t cur_char = data_[cur_position_++]; 133 if (cur_char == '(') { 134 ++level; 135 } else if (cur_char == ')') { 136 --level; 137 } 138 } 139 return GetDataToCurrentPosition(start_position); 140 } 141 HandleNonDelimiter()142ByteStringView CPDF_SimpleParser::HandleNonDelimiter() { 143 uint32_t start_position = cur_position_ - 1; 144 while (cur_position_ < data_.size()) { 145 uint8_t cur_char = data_[cur_position_]; 146 if (PDFCharIsDelimiter(cur_char) || PDFCharIsWhitespace(cur_char)) { 147 break; 148 } 149 ++cur_position_; 150 } 151 return GetDataToCurrentPosition(start_position); 152 } 153