1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
8
9 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
10
CPDF_SimpleParser(const uint8_t * pData,uint32_t dwSize)11 CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, uint32_t dwSize)
12 : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {}
13
CPDF_SimpleParser(const CFX_ByteStringC & str)14 CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str)
15 : m_pData(str.raw_str()), m_dwSize(str.GetLength()), m_dwCurPos(0) {}
16
ParseWord(const uint8_t * & pStart,uint32_t & dwSize)17 void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, uint32_t& dwSize) {
18 pStart = nullptr;
19 dwSize = 0;
20 uint8_t ch;
21 while (1) {
22 if (m_dwSize <= m_dwCurPos)
23 return;
24 ch = m_pData[m_dwCurPos++];
25 while (PDFCharIsWhitespace(ch)) {
26 if (m_dwSize <= m_dwCurPos)
27 return;
28 ch = m_pData[m_dwCurPos++];
29 }
30
31 if (ch != '%')
32 break;
33
34 while (1) {
35 if (m_dwSize <= m_dwCurPos)
36 return;
37 ch = m_pData[m_dwCurPos++];
38 if (PDFCharIsLineEnding(ch))
39 break;
40 }
41 }
42
43 uint32_t start_pos = m_dwCurPos - 1;
44 pStart = m_pData + start_pos;
45 if (PDFCharIsDelimiter(ch)) {
46 if (ch == '/') {
47 while (1) {
48 if (m_dwSize <= m_dwCurPos)
49 return;
50 ch = m_pData[m_dwCurPos++];
51 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
52 m_dwCurPos--;
53 dwSize = m_dwCurPos - start_pos;
54 return;
55 }
56 }
57 } else {
58 dwSize = 1;
59 if (ch == '<') {
60 if (m_dwSize <= m_dwCurPos)
61 return;
62 ch = m_pData[m_dwCurPos++];
63 if (ch == '<')
64 dwSize = 2;
65 else
66 m_dwCurPos--;
67 } else if (ch == '>') {
68 if (m_dwSize <= m_dwCurPos)
69 return;
70 ch = m_pData[m_dwCurPos++];
71 if (ch == '>')
72 dwSize = 2;
73 else
74 m_dwCurPos--;
75 }
76 }
77 return;
78 }
79
80 dwSize = 1;
81 while (1) {
82 if (m_dwSize <= m_dwCurPos)
83 return;
84 ch = m_pData[m_dwCurPos++];
85
86 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
87 m_dwCurPos--;
88 break;
89 }
90 dwSize++;
91 }
92 }
93
GetWord()94 CFX_ByteStringC CPDF_SimpleParser::GetWord() {
95 const uint8_t* pStart;
96 uint32_t dwSize;
97 ParseWord(pStart, dwSize);
98 if (dwSize == 1 && pStart[0] == '<') {
99 while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
100 m_dwCurPos++;
101 }
102 if (m_dwCurPos < m_dwSize) {
103 m_dwCurPos++;
104 }
105 return CFX_ByteStringC(pStart,
106 (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
107 }
108 if (dwSize == 1 && pStart[0] == '(') {
109 int level = 1;
110 while (m_dwCurPos < m_dwSize) {
111 if (m_pData[m_dwCurPos] == ')') {
112 level--;
113 if (level == 0) {
114 break;
115 }
116 }
117 if (m_pData[m_dwCurPos] == '\\') {
118 if (m_dwSize <= m_dwCurPos) {
119 break;
120 }
121 m_dwCurPos++;
122 } else if (m_pData[m_dwCurPos] == '(') {
123 level++;
124 }
125 if (m_dwSize <= m_dwCurPos) {
126 break;
127 }
128 m_dwCurPos++;
129 }
130 if (m_dwCurPos < m_dwSize) {
131 m_dwCurPos++;
132 }
133 return CFX_ByteStringC(pStart,
134 (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
135 }
136 return CFX_ByteStringC(pStart, dwSize);
137 }
138
FindTagParamFromStart(const CFX_ByteStringC & token,int nParams)139 bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token,
140 int nParams) {
141 nParams++;
142 uint32_t* pBuf = FX_Alloc(uint32_t, nParams);
143 int buf_index = 0;
144 int buf_count = 0;
145 m_dwCurPos = 0;
146 while (1) {
147 pBuf[buf_index++] = m_dwCurPos;
148 if (buf_index == nParams) {
149 buf_index = 0;
150 }
151 buf_count++;
152 if (buf_count > nParams) {
153 buf_count = nParams;
154 }
155 CFX_ByteStringC word = GetWord();
156 if (word.IsEmpty()) {
157 FX_Free(pBuf);
158 return false;
159 }
160 if (word == token) {
161 if (buf_count < nParams) {
162 continue;
163 }
164 m_dwCurPos = pBuf[buf_index];
165 FX_Free(pBuf);
166 return true;
167 }
168 }
169 return false;
170 }
171