1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
8
9 #include <algorithm>
10 #include <sstream>
11 #include <utility>
12 #include <vector>
13
14 #include "core/fpdfapi/cpdf_modulemgr.h"
15 #include "core/fpdfapi/parser/cpdf_array.h"
16 #include "core/fpdfapi/parser/cpdf_boolean.h"
17 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_null.h"
21 #include "core/fpdfapi/parser/cpdf_number.h"
22 #include "core/fpdfapi/parser/cpdf_read_validator.h"
23 #include "core/fpdfapi/parser/cpdf_reference.h"
24 #include "core/fpdfapi/parser/cpdf_stream.h"
25 #include "core/fpdfapi/parser/cpdf_string.h"
26 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
27 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
28 #include "core/fxcrt/autorestorer.h"
29 #include "core/fxcrt/cfx_binarybuf.h"
30 #include "core/fxcrt/fx_extension.h"
31 #include "third_party/base/numerics/safe_math.h"
32 #include "third_party/base/ptr_util.h"
33
34 namespace {
35
36 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn };
37
38 } // namespace
39
40 // static
41 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
42
CPDF_SyntaxParser()43 CPDF_SyntaxParser::CPDF_SyntaxParser()
44 : CPDF_SyntaxParser(WeakPtr<ByteStringPool>()) {}
45
CPDF_SyntaxParser(const WeakPtr<ByteStringPool> & pPool)46 CPDF_SyntaxParser::CPDF_SyntaxParser(const WeakPtr<ByteStringPool>& pPool)
47 : m_pFileAccess(nullptr), m_pPool(pPool) {}
48
~CPDF_SyntaxParser()49 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
50 }
51
GetCharAt(FX_FILESIZE pos,uint8_t & ch)52 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
53 AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
54 m_Pos = pos;
55 return GetNextChar(ch);
56 }
57
ReadBlockAt(FX_FILESIZE read_pos)58 bool CPDF_SyntaxParser::ReadBlockAt(FX_FILESIZE read_pos) {
59 if (read_pos >= m_FileLen)
60 return false;
61 size_t read_size = CPDF_ModuleMgr::kFileBufSize;
62 FX_SAFE_FILESIZE safe_end = read_pos;
63 safe_end += read_size;
64 if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_FileLen)
65 read_size = m_FileLen - read_pos;
66
67 m_pFileBuf.resize(read_size);
68 if (!m_pFileAccess->ReadBlock(m_pFileBuf.data(), read_pos, read_size)) {
69 m_pFileBuf.clear();
70 return false;
71 }
72
73 m_BufOffset = read_pos;
74 return true;
75 }
76
GetNextChar(uint8_t & ch)77 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
78 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
79 if (pos >= m_FileLen)
80 return false;
81
82 if (!IsPositionRead(pos) && !ReadBlockAt(pos))
83 return false;
84
85 ch = m_pFileBuf[pos - m_BufOffset];
86 m_Pos++;
87 return true;
88 }
89
GetCharAtBackward(FX_FILESIZE pos,uint8_t * ch)90 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) {
91 pos += m_HeaderOffset;
92 if (pos >= m_FileLen)
93 return false;
94
95 if (!IsPositionRead(pos)) {
96 FX_FILESIZE block_start = 0;
97 if (pos >= CPDF_ModuleMgr::kFileBufSize)
98 block_start = pos - CPDF_ModuleMgr::kFileBufSize + 1;
99 if (!ReadBlockAt(block_start) || !IsPositionRead(pos))
100 return false;
101 }
102 *ch = m_pFileBuf[pos - m_BufOffset];
103 return true;
104 }
105
ReadBlock(uint8_t * pBuf,uint32_t size)106 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {
107 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
108 return false;
109 m_Pos += size;
110 return true;
111 }
112
GetNextWordInternal(bool * bIsNumber)113 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
114 m_WordSize = 0;
115 if (bIsNumber)
116 *bIsNumber = true;
117
118 ToNextWord();
119 uint8_t ch;
120 if (!GetNextChar(ch))
121 return;
122
123 if (PDFCharIsDelimiter(ch)) {
124 if (bIsNumber)
125 *bIsNumber = false;
126
127 m_WordBuffer[m_WordSize++] = ch;
128 if (ch == '/') {
129 while (1) {
130 if (!GetNextChar(ch))
131 return;
132
133 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
134 m_Pos--;
135 return;
136 }
137
138 if (m_WordSize < sizeof(m_WordBuffer) - 1)
139 m_WordBuffer[m_WordSize++] = ch;
140 }
141 } else if (ch == '<') {
142 if (!GetNextChar(ch))
143 return;
144
145 if (ch == '<')
146 m_WordBuffer[m_WordSize++] = ch;
147 else
148 m_Pos--;
149 } else if (ch == '>') {
150 if (!GetNextChar(ch))
151 return;
152
153 if (ch == '>')
154 m_WordBuffer[m_WordSize++] = ch;
155 else
156 m_Pos--;
157 }
158 return;
159 }
160
161 while (1) {
162 if (m_WordSize < sizeof(m_WordBuffer) - 1)
163 m_WordBuffer[m_WordSize++] = ch;
164
165 if (!PDFCharIsNumeric(ch)) {
166 if (bIsNumber)
167 *bIsNumber = false;
168 }
169
170 if (!GetNextChar(ch))
171 return;
172
173 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
174 m_Pos--;
175 break;
176 }
177 }
178 }
179
ReadString()180 ByteString CPDF_SyntaxParser::ReadString() {
181 uint8_t ch;
182 if (!GetNextChar(ch))
183 return ByteString();
184
185 std::ostringstream buf;
186 int32_t parlevel = 0;
187 ReadStatus status = ReadStatus::Normal;
188 int32_t iEscCode = 0;
189 while (1) {
190 switch (status) {
191 case ReadStatus::Normal:
192 if (ch == ')') {
193 if (parlevel == 0)
194 return ByteString(buf);
195 parlevel--;
196 } else if (ch == '(') {
197 parlevel++;
198 }
199 if (ch == '\\')
200 status = ReadStatus::Backslash;
201 else
202 buf << static_cast<char>(ch);
203 break;
204 case ReadStatus::Backslash:
205 if (ch >= '0' && ch <= '7') {
206 iEscCode = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
207 status = ReadStatus::Octal;
208 break;
209 }
210
211 if (ch == '\r') {
212 status = ReadStatus::CarriageReturn;
213 break;
214 }
215 if (ch == 'n') {
216 buf << '\n';
217 } else if (ch == 'r') {
218 buf << '\r';
219 } else if (ch == 't') {
220 buf << '\t';
221 } else if (ch == 'b') {
222 buf << '\b';
223 } else if (ch == 'f') {
224 buf << '\f';
225 } else if (ch != '\n') {
226 buf << static_cast<char>(ch);
227 }
228 status = ReadStatus::Normal;
229 break;
230 case ReadStatus::Octal:
231 if (ch >= '0' && ch <= '7') {
232 iEscCode =
233 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
234 status = ReadStatus::FinishOctal;
235 } else {
236 buf << static_cast<char>(iEscCode);
237 status = ReadStatus::Normal;
238 continue;
239 }
240 break;
241 case ReadStatus::FinishOctal:
242 status = ReadStatus::Normal;
243 if (ch >= '0' && ch <= '7') {
244 iEscCode =
245 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
246 buf << static_cast<char>(iEscCode);
247 } else {
248 buf << static_cast<char>(iEscCode);
249 continue;
250 }
251 break;
252 case ReadStatus::CarriageReturn:
253 status = ReadStatus::Normal;
254 if (ch != '\n')
255 continue;
256 break;
257 }
258
259 if (!GetNextChar(ch))
260 break;
261 }
262
263 GetNextChar(ch);
264 return ByteString(buf);
265 }
266
ReadHexString()267 ByteString CPDF_SyntaxParser::ReadHexString() {
268 uint8_t ch;
269 if (!GetNextChar(ch))
270 return ByteString();
271
272 std::ostringstream buf;
273 bool bFirst = true;
274 uint8_t code = 0;
275 while (1) {
276 if (ch == '>')
277 break;
278
279 if (std::isxdigit(ch)) {
280 int val = FXSYS_HexCharToInt(ch);
281 if (bFirst) {
282 code = val * 16;
283 } else {
284 code += val;
285 buf << static_cast<char>(code);
286 }
287 bFirst = !bFirst;
288 }
289
290 if (!GetNextChar(ch))
291 break;
292 }
293 if (!bFirst)
294 buf << static_cast<char>(code);
295
296 return ByteString(buf);
297 }
298
ToNextLine()299 void CPDF_SyntaxParser::ToNextLine() {
300 uint8_t ch;
301 while (GetNextChar(ch)) {
302 if (ch == '\n')
303 break;
304
305 if (ch == '\r') {
306 GetNextChar(ch);
307 if (ch != '\n')
308 --m_Pos;
309 break;
310 }
311 }
312 }
313
ToNextWord()314 void CPDF_SyntaxParser::ToNextWord() {
315 uint8_t ch;
316 if (!GetNextChar(ch))
317 return;
318
319 while (1) {
320 while (PDFCharIsWhitespace(ch)) {
321 if (!GetNextChar(ch))
322 return;
323 }
324
325 if (ch != '%')
326 break;
327
328 while (1) {
329 if (!GetNextChar(ch))
330 return;
331 if (PDFCharIsLineEnding(ch))
332 break;
333 }
334 }
335 m_Pos--;
336 }
337
GetNextWord(bool * bIsNumber)338 ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
339 const CPDF_ReadValidator::Session read_session(GetValidator().Get());
340 GetNextWordInternal(bIsNumber);
341 ByteString ret;
342 if (!GetValidator()->has_read_problems())
343 ret = ByteString(m_WordBuffer, m_WordSize);
344 return ret;
345 }
346
PeekNextWord(bool * bIsNumber)347 ByteString CPDF_SyntaxParser::PeekNextWord(bool* bIsNumber) {
348 AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
349 return GetNextWord(bIsNumber);
350 }
351
GetKeyword()352 ByteString CPDF_SyntaxParser::GetKeyword() {
353 return GetNextWord(nullptr);
354 }
355
GetObjectBody(CPDF_IndirectObjectHolder * pObjList)356 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectBody(
357 CPDF_IndirectObjectHolder* pObjList) {
358 const CPDF_ReadValidator::Session read_session(GetValidator().Get());
359 auto result = GetObjectBodyInternal(pObjList, ParseType::kLoose);
360 if (GetValidator()->has_read_problems())
361 return nullptr;
362 return result;
363 }
364
GetObjectBodyInternal(CPDF_IndirectObjectHolder * pObjList,ParseType parse_type)365 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectBodyInternal(
366 CPDF_IndirectObjectHolder* pObjList,
367 ParseType parse_type) {
368 AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
369 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
370 return nullptr;
371
372 FX_FILESIZE SavedObjPos = m_Pos;
373 bool bIsNumber;
374 ByteString word = GetNextWord(&bIsNumber);
375 if (word.GetLength() == 0)
376 return nullptr;
377
378 if (bIsNumber) {
379 FX_FILESIZE SavedPos = m_Pos;
380 ByteString nextword = GetNextWord(&bIsNumber);
381 if (bIsNumber) {
382 ByteString nextword2 = GetNextWord(nullptr);
383 if (nextword2 == "R") {
384 uint32_t refnum = FXSYS_atoui(word.c_str());
385 if (refnum == CPDF_Object::kInvalidObjNum)
386 return nullptr;
387 return pdfium::MakeUnique<CPDF_Reference>(pObjList, refnum);
388 }
389 }
390 m_Pos = SavedPos;
391 return pdfium::MakeUnique<CPDF_Number>(word.AsStringView());
392 }
393
394 if (word == "true" || word == "false")
395 return pdfium::MakeUnique<CPDF_Boolean>(word == "true");
396
397 if (word == "null")
398 return pdfium::MakeUnique<CPDF_Null>();
399
400 if (word == "(") {
401 ByteString str = ReadString();
402 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
403 }
404 if (word == "<") {
405 ByteString str = ReadHexString();
406 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true);
407 }
408 if (word == "[") {
409 auto pArray = pdfium::MakeUnique<CPDF_Array>();
410 while (std::unique_ptr<CPDF_Object> pObj =
411 GetObjectBodyInternal(pObjList, ParseType::kLoose)) {
412 pArray->Add(std::move(pObj));
413 }
414 return (parse_type == ParseType::kLoose || m_WordBuffer[0] == ']')
415 ? std::move(pArray)
416 : nullptr;
417 }
418 if (word[0] == '/') {
419 return pdfium::MakeUnique<CPDF_Name>(
420 m_pPool,
421 PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1)));
422 }
423 if (word == "<<") {
424 std::unique_ptr<CPDF_Dictionary> pDict =
425 pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
426 while (1) {
427 ByteString key = GetNextWord(nullptr);
428 if (key.IsEmpty())
429 return nullptr;
430
431 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
432 if (key == ">>")
433 break;
434
435 if (key == "endobj") {
436 m_Pos = SavedPos;
437 break;
438 }
439 if (key[0] != '/')
440 continue;
441
442 key = PDF_NameDecode(key);
443
444 if (key.IsEmpty() && parse_type == ParseType::kLoose)
445 continue;
446
447 std::unique_ptr<CPDF_Object> pObj =
448 GetObjectBodyInternal(pObjList, ParseType::kLoose);
449 if (!pObj) {
450 if (parse_type == ParseType::kLoose)
451 continue;
452
453 ToNextLine();
454 return nullptr;
455 }
456
457 if (!key.IsEmpty()) {
458 ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1);
459 pDict->SetFor(keyNoSlash, std::move(pObj));
460 }
461 }
462
463 FX_FILESIZE SavedPos = m_Pos;
464 ByteString nextword = GetNextWord(nullptr);
465 if (nextword != "stream") {
466 m_Pos = SavedPos;
467 return std::move(pDict);
468 }
469 return ReadStream(std::move(pDict));
470 }
471 if (word == ">>")
472 m_Pos = SavedObjPos;
473
474 return nullptr;
475 }
476
GetIndirectObject(CPDF_IndirectObjectHolder * pObjList,ParseType parse_type)477 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetIndirectObject(
478 CPDF_IndirectObjectHolder* pObjList,
479 ParseType parse_type) {
480 const CPDF_ReadValidator::Session read_session(GetValidator().Get());
481 const FX_FILESIZE saved_pos = GetPos();
482 bool is_number = false;
483 ByteString word = GetNextWord(&is_number);
484 if (!is_number || word.IsEmpty()) {
485 SetPos(saved_pos);
486 return nullptr;
487 }
488 const uint32_t parser_objnum = FXSYS_atoui(word.c_str());
489
490 word = GetNextWord(&is_number);
491 if (!is_number || word.IsEmpty()) {
492 SetPos(saved_pos);
493 return nullptr;
494 }
495 const uint32_t parser_gennum = FXSYS_atoui(word.c_str());
496
497 if (GetKeyword() != "obj") {
498 SetPos(saved_pos);
499 return nullptr;
500 }
501
502 std::unique_ptr<CPDF_Object> pObj =
503 GetObjectBodyInternal(pObjList, parse_type);
504 if (pObj) {
505 pObj->SetObjNum(parser_objnum);
506 pObj->SetGenNum(parser_gennum);
507 }
508
509 return GetValidator()->has_read_problems() ? nullptr : std::move(pObj);
510 }
511
ReadEOLMarkers(FX_FILESIZE pos)512 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
513 unsigned char byte1 = 0;
514 unsigned char byte2 = 0;
515
516 GetCharAt(pos, byte1);
517 GetCharAt(pos + 1, byte2);
518
519 if (byte1 == '\r' && byte2 == '\n')
520 return 2;
521
522 if (byte1 == '\r' || byte1 == '\n')
523 return 1;
524
525 return 0;
526 }
527
ReadStream(std::unique_ptr<CPDF_Dictionary> pDict)528 std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
529 std::unique_ptr<CPDF_Dictionary> pDict) {
530 const CPDF_Number* pLenObj = ToNumber(pDict->GetDirectObjectFor("Length"));
531 FX_FILESIZE len = pLenObj ? pLenObj->GetInteger() : -1;
532
533 // Locate the start of stream.
534 ToNextLine();
535 FX_FILESIZE streamStartPos = m_Pos;
536
537 const ByteStringView kEndStreamStr("endstream");
538 const ByteStringView kEndObjStr("endobj");
539
540 bool bSearchForKeyword = true;
541 if (len >= 0) {
542 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
543 pos += len;
544 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
545 m_Pos = pos.ValueOrDie();
546
547 m_Pos += ReadEOLMarkers(m_Pos);
548 memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
549 GetNextWordInternal(nullptr);
550 // Earlier version of PDF specification doesn't require EOL marker before
551 // 'endstream' keyword. If keyword 'endstream' follows the bytes in
552 // specified length, it signals the end of stream.
553 if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
554 kEndStreamStr.GetLength()) == 0) {
555 bSearchForKeyword = false;
556 }
557 }
558
559 if (bSearchForKeyword) {
560 // If len is not available, len needs to be calculated
561 // by searching the keywords "endstream" or "endobj".
562 m_Pos = streamStartPos;
563 FX_FILESIZE endStreamOffset = 0;
564 while (endStreamOffset >= 0) {
565 endStreamOffset = FindTag(kEndStreamStr, 0);
566
567 // Can't find "endstream".
568 if (endStreamOffset < 0)
569 break;
570
571 // Stop searching when "endstream" is found.
572 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
573 kEndStreamStr, true)) {
574 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
575 break;
576 }
577 }
578
579 m_Pos = streamStartPos;
580 FX_FILESIZE endObjOffset = 0;
581 while (endObjOffset >= 0) {
582 endObjOffset = FindTag(kEndObjStr, 0);
583
584 // Can't find "endobj".
585 if (endObjOffset < 0)
586 break;
587
588 // Stop searching when "endobj" is found.
589 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
590 true)) {
591 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
592 break;
593 }
594 }
595
596 // Can't find "endstream" or "endobj".
597 if (endStreamOffset < 0 && endObjOffset < 0)
598 return nullptr;
599
600 if (endStreamOffset < 0 && endObjOffset >= 0) {
601 // Correct the position of end stream.
602 endStreamOffset = endObjOffset;
603 } else if (endStreamOffset >= 0 && endObjOffset < 0) {
604 // Correct the position of end obj.
605 endObjOffset = endStreamOffset;
606 } else if (endStreamOffset > endObjOffset) {
607 endStreamOffset = endObjOffset;
608 }
609 len = endStreamOffset;
610
611 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
612 if (numMarkers == 2) {
613 len -= 2;
614 } else {
615 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
616 if (numMarkers == 1) {
617 len -= 1;
618 }
619 }
620 if (len < 0)
621 return nullptr;
622
623 pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(len));
624 }
625 m_Pos = streamStartPos;
626
627 // Read up to the end of the buffer. Note, we allow zero length streams as
628 // we need to pass them through when we are importing pages into a new
629 // document.
630 len = std::min(len, m_FileLen - m_Pos - m_HeaderOffset);
631 if (len < 0)
632 return nullptr;
633
634 std::unique_ptr<uint8_t, FxFreeDeleter> pData;
635 if (len > 0) {
636 pData.reset(FX_Alloc(uint8_t, len));
637 ReadBlock(pData.get(), len);
638 }
639 auto pStream =
640 pdfium::MakeUnique<CPDF_Stream>(std::move(pData), len, std::move(pDict));
641 streamStartPos = m_Pos;
642 memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
643 GetNextWordInternal(nullptr);
644
645 int numMarkers = ReadEOLMarkers(m_Pos);
646 if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
647 numMarkers != 0 &&
648 memcmp(m_WordBuffer, kEndObjStr.raw_str(), kEndObjStr.GetLength()) == 0) {
649 m_Pos = streamStartPos;
650 }
651 return pStream;
652 }
653
InitParser(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,uint32_t HeaderOffset)654 void CPDF_SyntaxParser::InitParser(
655 const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
656 uint32_t HeaderOffset) {
657 ASSERT(pFileAccess);
658 return InitParserWithValidator(
659 pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr),
660 HeaderOffset);
661 }
662
InitParserWithValidator(const RetainPtr<CPDF_ReadValidator> & validator,uint32_t HeaderOffset)663 void CPDF_SyntaxParser::InitParserWithValidator(
664 const RetainPtr<CPDF_ReadValidator>& validator,
665 uint32_t HeaderOffset) {
666 ASSERT(validator);
667 m_pFileBuf.clear();
668 m_HeaderOffset = HeaderOffset;
669 m_FileLen = validator->GetSize();
670 m_Pos = 0;
671 m_pFileAccess = validator;
672 m_BufOffset = 0;
673 }
674
GetDirectNum()675 uint32_t CPDF_SyntaxParser::GetDirectNum() {
676 bool bIsNumber;
677 GetNextWordInternal(&bIsNumber);
678 if (!bIsNumber)
679 return 0;
680
681 m_WordBuffer[m_WordSize] = 0;
682 return FXSYS_atoui(reinterpret_cast<const char*>(m_WordBuffer));
683 }
684
IsWholeWord(FX_FILESIZE startpos,FX_FILESIZE limit,const ByteStringView & tag,bool checkKeyword)685 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
686 FX_FILESIZE limit,
687 const ByteStringView& tag,
688 bool checkKeyword) {
689 const uint32_t taglen = tag.GetLength();
690
691 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
692 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
693 !PDFCharIsWhitespace(tag[taglen - 1]);
694
695 uint8_t ch;
696 if (bCheckRight && startpos + (int32_t)taglen <= limit &&
697 GetCharAt(startpos + (int32_t)taglen, ch)) {
698 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
699 (checkKeyword && PDFCharIsDelimiter(ch))) {
700 return false;
701 }
702 }
703
704 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
705 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
706 (checkKeyword && PDFCharIsDelimiter(ch))) {
707 return false;
708 }
709 }
710 return true;
711 }
712
BackwardsSearchToWord(const ByteStringView & tag,FX_FILESIZE limit)713 bool CPDF_SyntaxParser::BackwardsSearchToWord(const ByteStringView& tag,
714 FX_FILESIZE limit) {
715 int32_t taglen = tag.GetLength();
716 if (taglen == 0)
717 return false;
718
719 FX_FILESIZE pos = m_Pos;
720 int32_t offset = taglen - 1;
721 while (1) {
722 if (limit && pos <= m_Pos - limit)
723 return false;
724
725 uint8_t byte;
726 if (!GetCharAtBackward(pos, &byte))
727 return false;
728
729 if (byte == tag[offset]) {
730 offset--;
731 if (offset >= 0) {
732 pos--;
733 continue;
734 }
735 if (IsWholeWord(pos, limit, tag, false)) {
736 m_Pos = pos;
737 return true;
738 }
739 }
740 offset = byte == tag[taglen - 1] ? taglen - 2 : taglen - 1;
741 pos--;
742 if (pos < 0)
743 return false;
744 }
745 }
746
FindTag(const ByteStringView & tag,FX_FILESIZE limit)747 FX_FILESIZE CPDF_SyntaxParser::FindTag(const ByteStringView& tag,
748 FX_FILESIZE limit) {
749 int32_t taglen = tag.GetLength();
750 int32_t match = 0;
751 limit += m_Pos;
752 FX_FILESIZE startpos = m_Pos;
753
754 while (1) {
755 uint8_t ch;
756 if (!GetNextChar(ch))
757 return -1;
758
759 if (ch == tag[match]) {
760 match++;
761 if (match == taglen)
762 return m_Pos - startpos - taglen;
763 } else {
764 match = ch == tag[0] ? 1 : 0;
765 }
766
767 if (limit && m_Pos == limit)
768 return -1;
769 }
770 return -1;
771 }
772
GetFileAccess() const773 RetainPtr<IFX_SeekableReadStream> CPDF_SyntaxParser::GetFileAccess() const {
774 return m_pFileAccess;
775 }
776
IsPositionRead(FX_FILESIZE pos) const777 bool CPDF_SyntaxParser::IsPositionRead(FX_FILESIZE pos) const {
778 return m_BufOffset <= pos &&
779 pos < static_cast<FX_FILESIZE>(m_BufOffset + m_pFileBuf.size());
780 }
781