1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "../../../include/fpdfapi/fpdf_parser.h"
8 extern const FX_LPCSTR _PDF_CharType =
9 "WRRRRRRRRWWRWWRRRRRRRRRRRRRRRRRR"
10 "WRRRRDRRDDRNRNNDNNNNNNNNNNRRDRDR"
11 "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR"
12 "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR"
13 "WRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
14 "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
15 "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR"
16 "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRW";
17 #ifndef MAX_PATH
18 #define MAX_PATH 4096
19 #endif
CPDF_SimpleParser(FX_LPCBYTE pData,FX_DWORD dwSize)20 CPDF_SimpleParser::CPDF_SimpleParser(FX_LPCBYTE pData, FX_DWORD dwSize)
21 {
22 m_pData = pData;
23 m_dwSize = dwSize;
24 m_dwCurPos = 0;
25 }
CPDF_SimpleParser(FX_BSTR str)26 CPDF_SimpleParser::CPDF_SimpleParser(FX_BSTR str)
27 {
28 m_pData = str;
29 m_dwSize = str.GetLength();
30 m_dwCurPos = 0;
31 }
ParseWord(FX_LPCBYTE & pStart,FX_DWORD & dwSize,int & type)32 void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& type)
33 {
34 pStart = NULL;
35 dwSize = 0;
36 type = PDFWORD_EOF;
37 FX_BYTE ch;
38 char chartype;
39 while (1) {
40 if (m_dwSize <= m_dwCurPos) {
41 return;
42 }
43 ch = m_pData[m_dwCurPos++];
44 chartype = _PDF_CharType[ch];
45 while (chartype == 'W') {
46 if (m_dwSize <= m_dwCurPos) {
47 return;
48 }
49 ch = m_pData[m_dwCurPos++];
50 chartype = _PDF_CharType[ch];
51 }
52 if (ch != '%') {
53 break;
54 }
55 while (1) {
56 if (m_dwSize <= m_dwCurPos) {
57 return;
58 }
59 ch = m_pData[m_dwCurPos++];
60 if (ch == '\r' || ch == '\n') {
61 break;
62 }
63 }
64 chartype = _PDF_CharType[ch];
65 }
66 FX_DWORD start_pos = m_dwCurPos - 1;
67 pStart = m_pData + start_pos;
68 if (chartype == 'D') {
69 if (ch == '/') {
70 while (1) {
71 if (m_dwSize <= m_dwCurPos) {
72 return;
73 }
74 ch = m_pData[m_dwCurPos++];
75 chartype = _PDF_CharType[ch];
76 if (chartype != 'R' && chartype != 'N') {
77 m_dwCurPos --;
78 dwSize = m_dwCurPos - start_pos;
79 type = PDFWORD_NAME;
80 return;
81 }
82 }
83 } else {
84 type = PDFWORD_DELIMITER;
85 dwSize = 1;
86 if (ch == '<') {
87 if (m_dwSize <= m_dwCurPos) {
88 return;
89 }
90 ch = m_pData[m_dwCurPos++];
91 if (ch == '<') {
92 dwSize = 2;
93 } else {
94 m_dwCurPos --;
95 }
96 } else if (ch == '>') {
97 if (m_dwSize <= m_dwCurPos) {
98 return;
99 }
100 ch = m_pData[m_dwCurPos++];
101 if (ch == '>') {
102 dwSize = 2;
103 } else {
104 m_dwCurPos --;
105 }
106 }
107 }
108 return;
109 }
110 type = PDFWORD_NUMBER;
111 dwSize = 1;
112 while (1) {
113 if (chartype != 'N') {
114 type = PDFWORD_TEXT;
115 }
116 if (m_dwSize <= m_dwCurPos) {
117 return;
118 }
119 ch = m_pData[m_dwCurPos++];
120 chartype = _PDF_CharType[ch];
121 if (chartype == 'D' || chartype == 'W') {
122 m_dwCurPos --;
123 break;
124 }
125 dwSize ++;
126 }
127 }
GetWord()128 CFX_ByteStringC CPDF_SimpleParser::GetWord()
129 {
130 FX_LPCBYTE pStart;
131 FX_DWORD dwSize;
132 int type;
133 ParseWord(pStart, dwSize, type);
134 if (dwSize == 1 && pStart[0] == '<') {
135 while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
136 m_dwCurPos ++;
137 }
138 if (m_dwCurPos < m_dwSize) {
139 m_dwCurPos ++;
140 }
141 return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
142 } else if (dwSize == 1 && pStart[0] == '(') {
143 int level = 1;
144 while (m_dwCurPos < m_dwSize) {
145 if (m_pData[m_dwCurPos] == ')') {
146 level --;
147 if (level == 0) {
148 break;
149 }
150 }
151 if (m_pData[m_dwCurPos] == '\\') {
152 if (m_dwSize <= m_dwCurPos) {
153 break;
154 }
155 m_dwCurPos ++;
156 } else if (m_pData[m_dwCurPos] == '(') {
157 level ++;
158 }
159 if (m_dwSize <= m_dwCurPos) {
160 break;
161 }
162 m_dwCurPos ++;
163 }
164 if (m_dwCurPos < m_dwSize) {
165 m_dwCurPos ++;
166 }
167 return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
168 }
169 return CFX_ByteStringC(pStart, dwSize);
170 }
SearchToken(FX_BSTR token)171 FX_BOOL CPDF_SimpleParser::SearchToken(FX_BSTR token)
172 {
173 int token_len = token.GetLength();
174 while (m_dwCurPos < m_dwSize - token_len) {
175 if (FXSYS_memcmp32(m_pData + m_dwCurPos, token, token_len) == 0) {
176 break;
177 }
178 m_dwCurPos ++;
179 }
180 if (m_dwCurPos == m_dwSize - token_len) {
181 return FALSE;
182 }
183 m_dwCurPos += token_len;
184 return TRUE;
185 }
SkipWord(FX_BSTR token)186 FX_BOOL CPDF_SimpleParser::SkipWord(FX_BSTR token)
187 {
188 while (1) {
189 CFX_ByteStringC word = GetWord();
190 if (word.IsEmpty()) {
191 return FALSE;
192 }
193 if (word == token) {
194 return TRUE;
195 }
196 }
197 return FALSE;
198 }
FindTagPair(FX_BSTR start_token,FX_BSTR end_token,FX_DWORD & start_pos,FX_DWORD & end_pos)199 FX_BOOL CPDF_SimpleParser::FindTagPair(FX_BSTR start_token, FX_BSTR end_token,
200 FX_DWORD& start_pos, FX_DWORD& end_pos)
201 {
202 if (!start_token.IsEmpty()) {
203 if (!SkipWord(start_token)) {
204 return FALSE;
205 }
206 start_pos = m_dwCurPos;
207 }
208 while (1) {
209 end_pos = m_dwCurPos;
210 CFX_ByteStringC word = GetWord();
211 if (word.IsEmpty()) {
212 return FALSE;
213 }
214 if (word == end_token) {
215 return TRUE;
216 }
217 }
218 return FALSE;
219 }
FindTagParam(FX_BSTR token,int nParams)220 FX_BOOL CPDF_SimpleParser::FindTagParam(FX_BSTR token, int nParams)
221 {
222 nParams ++;
223 FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams);
224 int buf_index = 0;
225 int buf_count = 0;
226 while (1) {
227 pBuf[buf_index++] = m_dwCurPos;
228 if (buf_index == nParams) {
229 buf_index = 0;
230 }
231 buf_count ++;
232 if (buf_count > nParams) {
233 buf_count = nParams;
234 }
235 CFX_ByteStringC word = GetWord();
236 if (word.IsEmpty()) {
237 FX_Free(pBuf);
238 return FALSE;
239 }
240 if (word == token) {
241 if (buf_count < nParams) {
242 continue;
243 }
244 m_dwCurPos = pBuf[buf_index];
245 FX_Free(pBuf);
246 return TRUE;
247 }
248 }
249 return FALSE;
250 }
_hex2dec(char ch)251 static int _hex2dec(char ch)
252 {
253 if (ch >= '0' && ch <= '9') {
254 return ch - '0';
255 }
256 if (ch >= 'a' && ch <= 'f') {
257 return ch - 'a' + 10;
258 }
259 if (ch >= 'A' && ch <= 'F') {
260 return ch - 'A' + 10;
261 }
262 return 0;
263 }
PDF_NameDecode(FX_BSTR bstr)264 CFX_ByteString PDF_NameDecode(FX_BSTR bstr)
265 {
266 int size = bstr.GetLength();
267 FX_LPCSTR pSrc = bstr.GetCStr();
268 if (FXSYS_memchr(pSrc, '#', size) == NULL) {
269 return bstr;
270 }
271 CFX_ByteString result;
272 FX_LPSTR pDestStart = result.GetBuffer(size);
273 FX_LPSTR pDest = pDestStart;
274 for (int i = 0; i < size; i ++) {
275 if (pSrc[i] == '#' && i < size - 2) {
276 *pDest ++ = _hex2dec(pSrc[i + 1]) * 16 + _hex2dec(pSrc[i + 2]);
277 i += 2;
278 } else {
279 *pDest ++ = pSrc[i];
280 }
281 }
282 result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart));
283 return result;
284 }
PDF_NameDecode(const CFX_ByteString & orig)285 CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig)
286 {
287 if (FXSYS_memchr((FX_LPCSTR)orig, '#', orig.GetLength()) == NULL) {
288 return orig;
289 }
290 return PDF_NameDecode(CFX_ByteStringC(orig));
291 }
PDF_NameEncode(const CFX_ByteString & orig)292 CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig)
293 {
294 FX_LPBYTE src_buf = (FX_LPBYTE)(FX_LPCSTR)orig;
295 int src_len = orig.GetLength();
296 int dest_len = 0;
297 int i;
298 for (i = 0; i < src_len; i ++) {
299 FX_BYTE ch = src_buf[i];
300 if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' ||
301 _PDF_CharType[ch] == 'D') {
302 dest_len += 3;
303 } else {
304 dest_len ++;
305 }
306 }
307 if (dest_len == src_len) {
308 return orig;
309 }
310 CFX_ByteString res;
311 FX_LPSTR dest_buf = res.GetBuffer(dest_len);
312 dest_len = 0;
313 for (i = 0; i < src_len; i ++) {
314 FX_BYTE ch = src_buf[i];
315 if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' ||
316 _PDF_CharType[ch] == 'D') {
317 dest_buf[dest_len++] = '#';
318 dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16];
319 dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16];
320 } else {
321 dest_buf[dest_len++] = ch;
322 }
323 }
324 dest_buf[dest_len] = 0;
325 res.ReleaseBuffer();
326 return res;
327 }
operator <<(CFX_ByteTextBuf & buf,const CPDF_Object * pObj)328 CFX_ByteTextBuf& operator << (CFX_ByteTextBuf& buf, const CPDF_Object* pObj)
329 {
330 if (pObj == NULL) {
331 buf << FX_BSTRC(" null");
332 return buf;
333 }
334 switch (pObj->GetType()) {
335 case PDFOBJ_NULL:
336 buf << FX_BSTRC(" null");
337 break;
338 case PDFOBJ_BOOLEAN:
339 case PDFOBJ_NUMBER:
340 buf << " " << pObj->GetString();
341 break;
342 case PDFOBJ_STRING: {
343 CFX_ByteString str = pObj->GetString();
344 FX_BOOL bHex = ((CPDF_String*)pObj)->IsHex();
345 buf << PDF_EncodeString(str, bHex);
346 break;
347 }
348 case PDFOBJ_NAME: {
349 CFX_ByteString str = pObj->GetString();
350 buf << FX_BSTRC("/") << PDF_NameEncode(str);
351 break;
352 }
353 case PDFOBJ_REFERENCE: {
354 CPDF_Reference* p = (CPDF_Reference*)pObj;
355 buf << " " << p->GetRefObjNum() << FX_BSTRC(" 0 R ");
356 break;
357 }
358 case PDFOBJ_ARRAY: {
359 CPDF_Array* p = (CPDF_Array*)pObj;
360 buf << FX_BSTRC("[");
361 for (FX_DWORD i = 0; i < p->GetCount(); i ++) {
362 CPDF_Object* pElement = p->GetElement(i);
363 if (pElement->GetObjNum()) {
364 buf << " " << pElement->GetObjNum() << FX_BSTRC(" 0 R");
365 } else {
366 buf << pElement;
367 }
368 }
369 buf << FX_BSTRC("]");
370 break;
371 }
372 case PDFOBJ_DICTIONARY: {
373 CPDF_Dictionary* p = (CPDF_Dictionary*)pObj;
374 buf << FX_BSTRC("<<");
375 FX_POSITION pos = p->GetStartPos();
376 while (pos) {
377 CFX_ByteString key;
378 CPDF_Object* pValue = p->GetNextElement(pos, key);
379 buf << FX_BSTRC("/") << PDF_NameEncode(key);
380 if (pValue->GetObjNum()) {
381 buf << " " << pValue->GetObjNum() << FX_BSTRC(" 0 R ");
382 } else {
383 buf << pValue;
384 }
385 }
386 buf << FX_BSTRC(">>");
387 break;
388 }
389 case PDFOBJ_STREAM: {
390 CPDF_Stream* p = (CPDF_Stream*)pObj;
391 buf << p->GetDict() << FX_BSTRC("stream\r\n");
392 CPDF_StreamAcc acc;
393 acc.LoadAllData(p, TRUE);
394 buf.AppendBlock(acc.GetData(), acc.GetSize());
395 buf << FX_BSTRC("\r\nendstream");
396 break;
397 }
398 default:
399 ASSERT(FALSE);
400 break;
401 }
402 return buf;
403 }
PDF_ClipFloat(FX_FLOAT f)404 FX_FLOAT PDF_ClipFloat(FX_FLOAT f)
405 {
406 if (f < 0) {
407 return 0;
408 }
409 if (f > 1.0f) {
410 return 1.0f;
411 }
412 return f;
413 }
SearchNumberNode(CPDF_Dictionary * pNode,int num)414 static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num)
415 {
416 CPDF_Array* pLimits = pNode->GetArray("Limits");
417 if (pLimits && (num < pLimits->GetInteger(0) || num > pLimits->GetInteger(1))) {
418 return NULL;
419 }
420 CPDF_Array* pNumbers = pNode->GetArray("Nums");
421 if (pNumbers) {
422 FX_DWORD dwCount = pNumbers->GetCount() / 2;
423 for (FX_DWORD i = 0; i < dwCount; i ++) {
424 int index = pNumbers->GetInteger(i * 2);
425 if (num == index) {
426 return pNumbers->GetElementValue(i * 2 + 1);
427 }
428 if (index > num) {
429 break;
430 }
431 }
432 return NULL;
433 }
434 CPDF_Array* pKids = pNode->GetArray("Kids");
435 if (pKids == NULL) {
436 return NULL;
437 }
438 for (FX_DWORD i = 0; i < pKids->GetCount(); i ++) {
439 CPDF_Dictionary* pKid = pKids->GetDict(i);
440 if (pKid == NULL) {
441 continue;
442 }
443 CPDF_Object* pFound = SearchNumberNode(pKid, num);
444 if (pFound) {
445 return pFound;
446 }
447 }
448 return NULL;
449 }
LookupValue(int num)450 CPDF_Object* CPDF_NumberTree::LookupValue(int num)
451 {
452 return SearchNumberNode(m_pRoot, num);
453 }
454