1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
8
9 #include <algorithm>
10 #include <sstream>
11 #include <utility>
12 #include <vector>
13
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_boolean.h"
16 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
18 #include "core/fpdfapi/parser/cpdf_name.h"
19 #include "core/fpdfapi/parser/cpdf_null.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_read_validator.h"
22 #include "core/fpdfapi/parser/cpdf_reference.h"
23 #include "core/fpdfapi/parser/cpdf_stream.h"
24 #include "core/fpdfapi/parser/cpdf_string.h"
25 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
26 #include "core/fxcrt/autorestorer.h"
27 #include "core/fxcrt/cfx_binarybuf.h"
28 #include "core/fxcrt/fx_extension.h"
29 #include "core/fxcrt/fx_safe_types.h"
30 #include "third_party/base/numerics/safe_math.h"
31 #include "third_party/base/ptr_util.h"
32
33 namespace {
34
35 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn };
36
37 class ReadableSubStream final : public IFX_SeekableReadStream {
38 public:
ReadableSubStream(const RetainPtr<IFX_SeekableReadStream> & pFileRead,FX_FILESIZE part_offset,FX_FILESIZE part_size)39 ReadableSubStream(const RetainPtr<IFX_SeekableReadStream>& pFileRead,
40 FX_FILESIZE part_offset,
41 FX_FILESIZE part_size)
42 : m_pFileRead(pFileRead),
43 m_PartOffset(part_offset),
44 m_PartSize(part_size) {}
45
46 ~ReadableSubStream() override = default;
47
48 // IFX_SeekableReadStream overrides:
ReadBlockAtOffset(void * buffer,FX_FILESIZE offset,size_t size)49 bool ReadBlockAtOffset(void* buffer,
50 FX_FILESIZE offset,
51 size_t size) override {
52 FX_SAFE_FILESIZE safe_end = offset;
53 safe_end += size;
54 // Check that requested range is valid, to prevent calling of ReadBlock
55 // of original m_pFileRead with incorrect params.
56 if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_PartSize)
57 return false;
58
59 return m_pFileRead->ReadBlockAtOffset(buffer, m_PartOffset + offset, size);
60 }
61
GetSize()62 FX_FILESIZE GetSize() override { return m_PartSize; }
63
64 private:
65 RetainPtr<IFX_SeekableReadStream> m_pFileRead;
66 FX_FILESIZE m_PartOffset;
67 FX_FILESIZE m_PartSize;
68 };
69
70 } // namespace
71
72 // static
73 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
74
75 // static
CreateForTesting(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,FX_FILESIZE HeaderOffset)76 std::unique_ptr<CPDF_SyntaxParser> CPDF_SyntaxParser::CreateForTesting(
77 const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
78 FX_FILESIZE HeaderOffset) {
79 return pdfium::MakeUnique<CPDF_SyntaxParser>(
80 pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr),
81 HeaderOffset);
82 }
83
CPDF_SyntaxParser(const RetainPtr<IFX_SeekableReadStream> & pFileAccess)84 CPDF_SyntaxParser::CPDF_SyntaxParser(
85 const RetainPtr<IFX_SeekableReadStream>& pFileAccess)
86 : CPDF_SyntaxParser(
87 pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr),
88 0) {}
89
CPDF_SyntaxParser(const RetainPtr<CPDF_ReadValidator> & validator,FX_FILESIZE HeaderOffset)90 CPDF_SyntaxParser::CPDF_SyntaxParser(
91 const RetainPtr<CPDF_ReadValidator>& validator,
92 FX_FILESIZE HeaderOffset)
93 : m_pFileAccess(validator),
94 m_HeaderOffset(HeaderOffset),
95 m_FileLen(m_pFileAccess->GetSize()) {
96 ASSERT(m_HeaderOffset <= m_FileLen);
97 }
98
99 CPDF_SyntaxParser::~CPDF_SyntaxParser() = default;
100
GetCharAt(FX_FILESIZE pos,uint8_t & ch)101 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
102 AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
103 m_Pos = pos;
104 return GetNextChar(ch);
105 }
106
ReadBlockAt(FX_FILESIZE read_pos)107 bool CPDF_SyntaxParser::ReadBlockAt(FX_FILESIZE read_pos) {
108 if (read_pos >= m_FileLen)
109 return false;
110 size_t read_size = m_ReadBufferSize;
111 FX_SAFE_FILESIZE safe_end = read_pos;
112 safe_end += read_size;
113 if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_FileLen)
114 read_size = m_FileLen - read_pos;
115
116 m_pFileBuf.resize(read_size);
117 if (!m_pFileAccess->ReadBlockAtOffset(m_pFileBuf.data(), read_pos,
118 read_size)) {
119 m_pFileBuf.clear();
120 return false;
121 }
122
123 m_BufOffset = read_pos;
124 return true;
125 }
126
GetNextChar(uint8_t & ch)127 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
128 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
129 if (pos >= m_FileLen)
130 return false;
131
132 if (!IsPositionRead(pos) && !ReadBlockAt(pos))
133 return false;
134
135 ch = m_pFileBuf[pos - m_BufOffset];
136 m_Pos++;
137 return true;
138 }
139
GetDocumentSize() const140 FX_FILESIZE CPDF_SyntaxParser::GetDocumentSize() const {
141 return m_FileLen - m_HeaderOffset;
142 }
143
GetCharAtBackward(FX_FILESIZE pos,uint8_t * ch)144 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) {
145 pos += m_HeaderOffset;
146 if (pos >= m_FileLen)
147 return false;
148
149 if (!IsPositionRead(pos)) {
150 FX_FILESIZE block_start = 0;
151 if (pos >= CPDF_Stream::kFileBufSize)
152 block_start = pos - CPDF_Stream::kFileBufSize + 1;
153 if (!ReadBlockAt(block_start) || !IsPositionRead(pos))
154 return false;
155 }
156 *ch = m_pFileBuf[pos - m_BufOffset];
157 return true;
158 }
159
ReadBlock(uint8_t * pBuf,uint32_t size)160 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {
161 if (!m_pFileAccess->ReadBlockAtOffset(pBuf, m_Pos + m_HeaderOffset, size))
162 return false;
163 m_Pos += size;
164 return true;
165 }
166
GetNextWordInternal(bool * bIsNumber)167 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
168 m_WordSize = 0;
169 if (bIsNumber)
170 *bIsNumber = true;
171
172 ToNextWord();
173 uint8_t ch;
174 if (!GetNextChar(ch))
175 return;
176
177 if (PDFCharIsDelimiter(ch)) {
178 if (bIsNumber)
179 *bIsNumber = false;
180
181 m_WordBuffer[m_WordSize++] = ch;
182 if (ch == '/') {
183 while (1) {
184 if (!GetNextChar(ch))
185 return;
186
187 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
188 m_Pos--;
189 return;
190 }
191
192 if (m_WordSize < sizeof(m_WordBuffer) - 1)
193 m_WordBuffer[m_WordSize++] = ch;
194 }
195 } else if (ch == '<') {
196 if (!GetNextChar(ch))
197 return;
198
199 if (ch == '<')
200 m_WordBuffer[m_WordSize++] = ch;
201 else
202 m_Pos--;
203 } else if (ch == '>') {
204 if (!GetNextChar(ch))
205 return;
206
207 if (ch == '>')
208 m_WordBuffer[m_WordSize++] = ch;
209 else
210 m_Pos--;
211 }
212 return;
213 }
214
215 while (1) {
216 if (m_WordSize < sizeof(m_WordBuffer) - 1)
217 m_WordBuffer[m_WordSize++] = ch;
218
219 if (!PDFCharIsNumeric(ch)) {
220 if (bIsNumber)
221 *bIsNumber = false;
222 }
223
224 if (!GetNextChar(ch))
225 return;
226
227 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
228 m_Pos--;
229 break;
230 }
231 }
232 }
233
ReadString()234 ByteString CPDF_SyntaxParser::ReadString() {
235 uint8_t ch;
236 if (!GetNextChar(ch))
237 return ByteString();
238
239 std::ostringstream buf;
240 int32_t parlevel = 0;
241 ReadStatus status = ReadStatus::Normal;
242 int32_t iEscCode = 0;
243 while (1) {
244 switch (status) {
245 case ReadStatus::Normal:
246 if (ch == ')') {
247 if (parlevel == 0)
248 return ByteString(buf);
249 parlevel--;
250 } else if (ch == '(') {
251 parlevel++;
252 }
253 if (ch == '\\')
254 status = ReadStatus::Backslash;
255 else
256 buf << static_cast<char>(ch);
257 break;
258 case ReadStatus::Backslash:
259 if (FXSYS_IsOctalDigit(ch)) {
260 iEscCode = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
261 status = ReadStatus::Octal;
262 break;
263 }
264
265 if (ch == '\r') {
266 status = ReadStatus::CarriageReturn;
267 break;
268 }
269 if (ch == 'n') {
270 buf << '\n';
271 } else if (ch == 'r') {
272 buf << '\r';
273 } else if (ch == 't') {
274 buf << '\t';
275 } else if (ch == 'b') {
276 buf << '\b';
277 } else if (ch == 'f') {
278 buf << '\f';
279 } else if (ch != '\n') {
280 buf << static_cast<char>(ch);
281 }
282 status = ReadStatus::Normal;
283 break;
284 case ReadStatus::Octal:
285 if (FXSYS_IsOctalDigit(ch)) {
286 iEscCode =
287 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
288 status = ReadStatus::FinishOctal;
289 } else {
290 buf << static_cast<char>(iEscCode);
291 status = ReadStatus::Normal;
292 continue;
293 }
294 break;
295 case ReadStatus::FinishOctal:
296 status = ReadStatus::Normal;
297 if (FXSYS_IsOctalDigit(ch)) {
298 iEscCode =
299 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
300 buf << static_cast<char>(iEscCode);
301 } else {
302 buf << static_cast<char>(iEscCode);
303 continue;
304 }
305 break;
306 case ReadStatus::CarriageReturn:
307 status = ReadStatus::Normal;
308 if (ch != '\n')
309 continue;
310 break;
311 }
312
313 if (!GetNextChar(ch))
314 break;
315 }
316
317 GetNextChar(ch);
318 return ByteString(buf);
319 }
320
ReadHexString()321 ByteString CPDF_SyntaxParser::ReadHexString() {
322 uint8_t ch;
323 if (!GetNextChar(ch))
324 return ByteString();
325
326 std::ostringstream buf;
327 bool bFirst = true;
328 uint8_t code = 0;
329 while (1) {
330 if (ch == '>')
331 break;
332
333 if (std::isxdigit(ch)) {
334 int val = FXSYS_HexCharToInt(ch);
335 if (bFirst) {
336 code = val * 16;
337 } else {
338 code += val;
339 buf << static_cast<char>(code);
340 }
341 bFirst = !bFirst;
342 }
343
344 if (!GetNextChar(ch))
345 break;
346 }
347 if (!bFirst)
348 buf << static_cast<char>(code);
349
350 return ByteString(buf);
351 }
352
ToNextLine()353 void CPDF_SyntaxParser::ToNextLine() {
354 uint8_t ch;
355 while (GetNextChar(ch)) {
356 if (ch == '\n')
357 break;
358
359 if (ch == '\r') {
360 GetNextChar(ch);
361 if (ch != '\n')
362 --m_Pos;
363 break;
364 }
365 }
366 }
367
ToNextWord()368 void CPDF_SyntaxParser::ToNextWord() {
369 uint8_t ch;
370 if (!GetNextChar(ch))
371 return;
372
373 while (1) {
374 while (PDFCharIsWhitespace(ch)) {
375 if (!GetNextChar(ch))
376 return;
377 }
378
379 if (ch != '%')
380 break;
381
382 while (1) {
383 if (!GetNextChar(ch))
384 return;
385 if (PDFCharIsLineEnding(ch))
386 break;
387 }
388 }
389 m_Pos--;
390 }
391
GetNextWord(bool * bIsNumber)392 ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
393 const CPDF_ReadValidator::Session read_session(GetValidator());
394 GetNextWordInternal(bIsNumber);
395 ByteString ret;
396 if (!GetValidator()->has_read_problems())
397 ret = ByteString(m_WordBuffer, m_WordSize);
398 return ret;
399 }
400
PeekNextWord(bool * bIsNumber)401 ByteString CPDF_SyntaxParser::PeekNextWord(bool* bIsNumber) {
402 AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
403 return GetNextWord(bIsNumber);
404 }
405
GetKeyword()406 ByteString CPDF_SyntaxParser::GetKeyword() {
407 return GetNextWord(nullptr);
408 }
409
SetPos(FX_FILESIZE pos)410 void CPDF_SyntaxParser::SetPos(FX_FILESIZE pos) {
411 m_Pos = std::min(pos, m_FileLen);
412 }
413
GetObjectBody(CPDF_IndirectObjectHolder * pObjList)414 RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetObjectBody(
415 CPDF_IndirectObjectHolder* pObjList) {
416 const CPDF_ReadValidator::Session read_session(GetValidator());
417 auto result = GetObjectBodyInternal(pObjList, ParseType::kLoose);
418 if (GetValidator()->has_read_problems())
419 return nullptr;
420 return result;
421 }
422
GetObjectBodyInternal(CPDF_IndirectObjectHolder * pObjList,ParseType parse_type)423 RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetObjectBodyInternal(
424 CPDF_IndirectObjectHolder* pObjList,
425 ParseType parse_type) {
426 AutoRestorer<int> depth_restorer(&s_CurrentRecursionDepth);
427 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
428 return nullptr;
429
430 FX_FILESIZE SavedObjPos = m_Pos;
431 bool bIsNumber;
432 ByteString word = GetNextWord(&bIsNumber);
433 if (word.IsEmpty())
434 return nullptr;
435
436 if (bIsNumber) {
437 AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos);
438 ByteString nextword = GetNextWord(&bIsNumber);
439 if (!bIsNumber)
440 return pdfium::MakeRetain<CPDF_Number>(word.AsStringView());
441
442 ByteString nextword2 = GetNextWord(nullptr);
443 if (nextword2 != "R")
444 return pdfium::MakeRetain<CPDF_Number>(word.AsStringView());
445
446 pos_restorer.AbandonRestoration();
447 uint32_t refnum = FXSYS_atoui(word.c_str());
448 if (refnum == CPDF_Object::kInvalidObjNum)
449 return nullptr;
450
451 return pdfium::MakeRetain<CPDF_Reference>(pObjList, refnum);
452 }
453
454 if (word == "true" || word == "false")
455 return pdfium::MakeRetain<CPDF_Boolean>(word == "true");
456
457 if (word == "null")
458 return pdfium::MakeRetain<CPDF_Null>();
459
460 if (word == "(") {
461 ByteString str = ReadString();
462 return pdfium::MakeRetain<CPDF_String>(m_pPool, str, false);
463 }
464 if (word == "<") {
465 ByteString str = ReadHexString();
466 return pdfium::MakeRetain<CPDF_String>(m_pPool, str, true);
467 }
468 if (word == "[") {
469 auto pArray = pdfium::MakeRetain<CPDF_Array>();
470 while (RetainPtr<CPDF_Object> pObj =
471 GetObjectBodyInternal(pObjList, ParseType::kLoose)) {
472 pArray->Add(std::move(pObj));
473 }
474 return (parse_type == ParseType::kLoose || m_WordBuffer[0] == ']')
475 ? std::move(pArray)
476 : nullptr;
477 }
478 if (word[0] == '/') {
479 return pdfium::MakeRetain<CPDF_Name>(
480 m_pPool,
481 PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1)));
482 }
483 if (word == "<<") {
484 RetainPtr<CPDF_Dictionary> pDict =
485 pdfium::MakeRetain<CPDF_Dictionary>(m_pPool);
486 while (1) {
487 ByteString inner_word = GetNextWord(nullptr);
488 if (inner_word.IsEmpty())
489 return nullptr;
490
491 FX_FILESIZE SavedPos = m_Pos - inner_word.GetLength();
492 if (inner_word == ">>")
493 break;
494
495 if (inner_word == "endobj") {
496 m_Pos = SavedPos;
497 break;
498 }
499 if (inner_word[0] != '/')
500 continue;
501
502 ByteString key = PDF_NameDecode(inner_word.AsStringView());
503 if (key.IsEmpty() && parse_type == ParseType::kLoose)
504 continue;
505
506 RetainPtr<CPDF_Object> pObj =
507 GetObjectBodyInternal(pObjList, ParseType::kLoose);
508 if (!pObj) {
509 if (parse_type == ParseType::kLoose)
510 continue;
511
512 ToNextLine();
513 return nullptr;
514 }
515
516 if (!key.IsEmpty()) {
517 ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1);
518 pDict->SetFor(keyNoSlash, std::move(pObj));
519 }
520 }
521
522 AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos);
523 if (GetNextWord(nullptr) != "stream")
524 return pDict;
525 pos_restorer.AbandonRestoration();
526 return ReadStream(std::move(pDict));
527 }
528 if (word == ">>")
529 m_Pos = SavedObjPos;
530
531 return nullptr;
532 }
533
GetIndirectObject(CPDF_IndirectObjectHolder * pObjList,ParseType parse_type)534 RetainPtr<CPDF_Object> CPDF_SyntaxParser::GetIndirectObject(
535 CPDF_IndirectObjectHolder* pObjList,
536 ParseType parse_type) {
537 const CPDF_ReadValidator::Session read_session(GetValidator());
538 const FX_FILESIZE saved_pos = GetPos();
539 bool is_number = false;
540 ByteString word = GetNextWord(&is_number);
541 if (!is_number || word.IsEmpty()) {
542 SetPos(saved_pos);
543 return nullptr;
544 }
545 const uint32_t parser_objnum = FXSYS_atoui(word.c_str());
546
547 word = GetNextWord(&is_number);
548 if (!is_number || word.IsEmpty()) {
549 SetPos(saved_pos);
550 return nullptr;
551 }
552 const uint32_t parser_gennum = FXSYS_atoui(word.c_str());
553
554 if (GetKeyword() != "obj") {
555 SetPos(saved_pos);
556 return nullptr;
557 }
558
559 RetainPtr<CPDF_Object> pObj = GetObjectBodyInternal(pObjList, parse_type);
560 if (pObj) {
561 pObj->SetObjNum(parser_objnum);
562 pObj->SetGenNum(parser_gennum);
563 }
564
565 return GetValidator()->has_read_problems() ? nullptr : std::move(pObj);
566 }
567
ReadEOLMarkers(FX_FILESIZE pos)568 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
569 unsigned char byte1 = 0;
570 unsigned char byte2 = 0;
571
572 GetCharAt(pos, byte1);
573 GetCharAt(pos + 1, byte2);
574
575 if (byte1 == '\r' && byte2 == '\n')
576 return 2;
577
578 if (byte1 == '\r' || byte1 == '\n')
579 return 1;
580
581 return 0;
582 }
583
FindWordPos(ByteStringView word)584 FX_FILESIZE CPDF_SyntaxParser::FindWordPos(ByteStringView word) {
585 AutoRestorer<FX_FILESIZE> pos_restorer(&m_Pos);
586 FX_FILESIZE end_offset = FindTag(word);
587 while (end_offset >= 0) {
588 // Stop searching when word is found.
589 if (IsWholeWord(GetPos() - word.GetLength(), m_FileLen, word, true))
590 return GetPos() - word.GetLength();
591
592 end_offset = FindTag(word);
593 }
594 return -1;
595 }
596
FindStreamEndPos()597 FX_FILESIZE CPDF_SyntaxParser::FindStreamEndPos() {
598 const ByteStringView kEndStreamStr("endstream");
599 const ByteStringView kEndObjStr("endobj");
600
601 FX_FILESIZE endStreamWordOffset = FindWordPos(kEndStreamStr);
602 FX_FILESIZE endObjWordOffset = FindWordPos(kEndObjStr);
603
604 // Can't find "endstream" or "endobj".
605 if (endStreamWordOffset < 0 && endObjWordOffset < 0) {
606 return -1;
607 }
608
609 if (endStreamWordOffset < 0 && endObjWordOffset >= 0) {
610 // Correct the position of end stream.
611 endStreamWordOffset = endObjWordOffset;
612 } else if (endStreamWordOffset >= 0 && endObjWordOffset < 0) {
613 // Correct the position of end obj.
614 endObjWordOffset = endStreamWordOffset;
615 } else if (endStreamWordOffset > endObjWordOffset) {
616 endStreamWordOffset = endObjWordOffset;
617 }
618
619 int numMarkers = ReadEOLMarkers(endStreamWordOffset - 2);
620 if (numMarkers == 2) {
621 endStreamWordOffset -= 2;
622 } else {
623 numMarkers = ReadEOLMarkers(endStreamWordOffset - 1);
624 if (numMarkers == 1) {
625 endStreamWordOffset -= 1;
626 }
627 }
628 if (endStreamWordOffset < GetPos()) {
629 return -1;
630 }
631 return endStreamWordOffset;
632 }
633
ReadStream(RetainPtr<CPDF_Dictionary> pDict)634 RetainPtr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
635 RetainPtr<CPDF_Dictionary> pDict) {
636 const CPDF_Number* pLenObj = ToNumber(pDict->GetDirectObjectFor("Length"));
637 FX_FILESIZE len = pLenObj ? pLenObj->GetInteger() : -1;
638
639 // Locate the start of stream.
640 ToNextLine();
641 const FX_FILESIZE streamStartPos = GetPos();
642
643 if (len > 0) {
644 FX_SAFE_FILESIZE pos = GetPos();
645 pos += len;
646 if (!pos.IsValid() || pos.ValueOrDie() >= m_FileLen)
647 len = -1;
648 }
649
650 RetainPtr<IFX_SeekableReadStream> data;
651 if (len > 0) {
652 // Check data availability first to allow the Validator to request data
653 // smoothly, without jumps.
654 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
655 m_HeaderOffset + GetPos(), len)) {
656 return nullptr;
657 }
658
659 data = pdfium::MakeRetain<ReadableSubStream>(
660 GetValidator(), m_HeaderOffset + GetPos(), len);
661 SetPos(GetPos() + len);
662 }
663
664 const ByteStringView kEndStreamStr("endstream");
665 const ByteStringView kEndObjStr("endobj");
666
667 // Note, we allow zero length streams as we need to pass them through when we
668 // are importing pages into a new document.
669 if (len >= 0) {
670 const CPDF_ReadValidator::Session read_session(GetValidator());
671 m_Pos += ReadEOLMarkers(GetPos());
672 memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
673 GetNextWordInternal(nullptr);
674 if (GetValidator()->has_read_problems())
675 return nullptr;
676
677 // Earlier version of PDF specification doesn't require EOL marker before
678 // 'endstream' keyword. If keyword 'endstream' follows the bytes in
679 // specified length, it signals the end of stream.
680 if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
681 kEndStreamStr.GetLength()) != 0) {
682 data.Reset();
683 len = -1;
684 SetPos(streamStartPos);
685 }
686 }
687
688 if (len < 0) {
689 // If len is not available or incorrect, len needs to be calculated
690 // by searching the keywords "endstream" or "endobj".
691 const FX_FILESIZE streamEndPos = FindStreamEndPos();
692 if (streamEndPos < 0)
693 return nullptr;
694
695 len = streamEndPos - streamStartPos;
696 ASSERT(len >= 0);
697 if (len > 0) {
698 SetPos(streamStartPos);
699 // Check data availability first to allow the Validator to request data
700 // smoothly, without jumps.
701 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
702 m_HeaderOffset + GetPos(), len)) {
703 return nullptr;
704 }
705
706 data = pdfium::MakeRetain<ReadableSubStream>(
707 GetValidator(), m_HeaderOffset + GetPos(), len);
708 SetPos(GetPos() + len);
709 }
710 }
711
712 auto pStream = pdfium::MakeRetain<CPDF_Stream>();
713 if (data) {
714 pStream->InitStreamFromFile(data, std::move(pDict));
715 } else {
716 DCHECK(!len);
717 pStream->InitStream({}, std::move(pDict)); // Empty stream
718 }
719 const FX_FILESIZE end_stream_offset = GetPos();
720 memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
721 GetNextWordInternal(nullptr);
722
723 int numMarkers = ReadEOLMarkers(GetPos());
724 if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
725 numMarkers != 0 &&
726 memcmp(m_WordBuffer, kEndObjStr.raw_str(), kEndObjStr.GetLength()) == 0) {
727 SetPos(end_stream_offset);
728 }
729 return pStream;
730 }
731
GetDirectNum()732 uint32_t CPDF_SyntaxParser::GetDirectNum() {
733 bool bIsNumber;
734 GetNextWordInternal(&bIsNumber);
735 if (!bIsNumber)
736 return 0;
737
738 m_WordBuffer[m_WordSize] = 0;
739 return FXSYS_atoui(reinterpret_cast<const char*>(m_WordBuffer));
740 }
741
IsWholeWord(FX_FILESIZE startpos,FX_FILESIZE limit,ByteStringView tag,bool checkKeyword)742 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
743 FX_FILESIZE limit,
744 ByteStringView tag,
745 bool checkKeyword) {
746 const uint32_t taglen = tag.GetLength();
747
748 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
749 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
750 !PDFCharIsWhitespace(tag[taglen - 1]);
751
752 uint8_t ch;
753 if (bCheckRight && startpos + (int32_t)taglen <= limit &&
754 GetCharAt(startpos + (int32_t)taglen, ch)) {
755 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
756 (checkKeyword && PDFCharIsDelimiter(ch))) {
757 return false;
758 }
759 }
760
761 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
762 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
763 (checkKeyword && PDFCharIsDelimiter(ch))) {
764 return false;
765 }
766 }
767 return true;
768 }
769
BackwardsSearchToWord(ByteStringView word,FX_FILESIZE limit)770 bool CPDF_SyntaxParser::BackwardsSearchToWord(ByteStringView word,
771 FX_FILESIZE limit) {
772 int32_t taglen = word.GetLength();
773 if (taglen == 0)
774 return false;
775
776 FX_FILESIZE pos = m_Pos;
777 int32_t offset = taglen - 1;
778 while (1) {
779 if (limit && pos <= m_Pos - limit)
780 return false;
781
782 uint8_t byte;
783 if (!GetCharAtBackward(pos, &byte))
784 return false;
785
786 if (byte == word[offset]) {
787 offset--;
788 if (offset >= 0) {
789 pos--;
790 continue;
791 }
792 if (IsWholeWord(pos, limit, word, false)) {
793 m_Pos = pos;
794 return true;
795 }
796 }
797 offset = byte == word[taglen - 1] ? taglen - 2 : taglen - 1;
798 pos--;
799 if (pos < 0)
800 return false;
801 }
802 }
803
FindTag(ByteStringView tag)804 FX_FILESIZE CPDF_SyntaxParser::FindTag(ByteStringView tag) {
805 const FX_FILESIZE startpos = GetPos();
806 const int32_t taglen = tag.GetLength();
807 ASSERT(taglen > 0);
808
809 int32_t match = 0;
810 while (1) {
811 uint8_t ch;
812 if (!GetNextChar(ch))
813 return -1;
814
815 if (ch == tag[match]) {
816 match++;
817 if (match == taglen)
818 return GetPos() - startpos - taglen;
819 } else {
820 match = ch == tag[0] ? 1 : 0;
821 }
822 }
823 return -1;
824 }
825
IsPositionRead(FX_FILESIZE pos) const826 bool CPDF_SyntaxParser::IsPositionRead(FX_FILESIZE pos) const {
827 return m_BufOffset <= pos &&
828 pos < static_cast<FX_FILESIZE>(m_BufOffset + m_pFileBuf.size());
829 }
830