1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_parser.h"
8
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_reference.h"
20 #include "core/fpdfapi/parser/cpdf_security_handler.h"
21 #include "core/fpdfapi/parser/cpdf_stream.h"
22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
23 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
24 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
25 #include "core/fxcrt/fx_ext.h"
26 #include "core/fxcrt/fx_safe_types.h"
27 #include "third_party/base/ptr_util.h"
28 #include "third_party/base/stl_util.h"
29
30 namespace {
31
32 // A limit on the size of the xref table. Theoretical limits are higher, but
33 // this may be large enough in practice.
34 const int32_t kMaxXRefSize = 1048576;
35
GetVarInt(const uint8_t * p,int32_t n)36 uint32_t GetVarInt(const uint8_t* p, int32_t n) {
37 uint32_t result = 0;
38 for (int32_t i = 0; i < n; ++i)
39 result = result * 256 + p[i];
40 return result;
41 }
42
GetStreamNCount(CPDF_StreamAcc * pObjStream)43 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
44 return pObjStream->GetDict()->GetIntegerFor("N");
45 }
46
GetStreamFirst(CPDF_StreamAcc * pObjStream)47 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
48 return pObjStream->GetDict()->GetIntegerFor("First");
49 }
50
51 } // namespace
52
CPDF_Parser()53 CPDF_Parser::CPDF_Parser()
54 : m_pDocument(nullptr),
55 m_bHasParsed(false),
56 m_bXRefStream(false),
57 m_bVersionUpdated(false),
58 m_FileVersion(0),
59 m_pEncryptDict(nullptr),
60 m_dwXrefStartObjNum(0) {
61 m_pSyntax = pdfium::MakeUnique<CPDF_SyntaxParser>();
62 }
63
~CPDF_Parser()64 CPDF_Parser::~CPDF_Parser() {
65 ReleaseEncryptHandler();
66 SetEncryptDictionary(nullptr);
67 }
68
GetLastObjNum() const69 uint32_t CPDF_Parser::GetLastObjNum() const {
70 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
71 }
72
IsValidObjectNumber(uint32_t objnum) const73 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
74 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
75 }
76
GetObjectPositionOrZero(uint32_t objnum) const77 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
78 auto it = m_ObjectInfo.find(objnum);
79 return it != m_ObjectInfo.end() ? it->second.pos : 0;
80 }
81
GetObjectType(uint32_t objnum) const82 uint8_t CPDF_Parser::GetObjectType(uint32_t objnum) const {
83 ASSERT(IsValidObjectNumber(objnum));
84 auto it = m_ObjectInfo.find(objnum);
85 return it != m_ObjectInfo.end() ? it->second.type : 0;
86 }
87
GetObjectGenNum(uint32_t objnum) const88 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const {
89 ASSERT(IsValidObjectNumber(objnum));
90 auto it = m_ObjectInfo.find(objnum);
91 return it != m_ObjectInfo.end() ? it->second.gennum : 0;
92 }
93
IsObjectFreeOrNull(uint32_t objnum) const94 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
95 uint8_t type = GetObjectType(objnum);
96 return type == 0 || type == 255;
97 }
98
SetEncryptDictionary(CPDF_Dictionary * pDict)99 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
100 m_pEncryptDict = pDict;
101 }
102
GetCryptoHandler()103 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {
104 return m_pSyntax->m_pCryptoHandler.get();
105 }
106
GetFileAccess() const107 CFX_RetainPtr<IFX_SeekableReadStream> CPDF_Parser::GetFileAccess() const {
108 return m_pSyntax->m_pFileAccess;
109 }
110
ShrinkObjectMap(uint32_t objnum)111 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) {
112 if (objnum == 0) {
113 m_ObjectInfo.clear();
114 return;
115 }
116
117 auto it = m_ObjectInfo.lower_bound(objnum);
118 while (it != m_ObjectInfo.end()) {
119 auto saved_it = it++;
120 m_ObjectInfo.erase(saved_it);
121 }
122
123 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
124 m_ObjectInfo[objnum - 1].pos = 0;
125 }
126
StartParse(const CFX_RetainPtr<IFX_SeekableReadStream> & pFileAccess,CPDF_Document * pDocument)127 CPDF_Parser::Error CPDF_Parser::StartParse(
128 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
129 CPDF_Document* pDocument) {
130 ASSERT(!m_bHasParsed);
131 m_bHasParsed = true;
132 m_bXRefStream = false;
133 m_LastXRefOffset = 0;
134
135 int32_t offset = GetHeaderOffset(pFileAccess);
136 if (offset == -1)
137 return FORMAT_ERROR;
138
139 m_pSyntax->InitParser(pFileAccess, offset);
140
141 uint8_t ch;
142 if (!m_pSyntax->GetCharAt(5, ch))
143 return FORMAT_ERROR;
144
145 if (std::isdigit(ch))
146 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;
147
148 if (!m_pSyntax->GetCharAt(7, ch))
149 return FORMAT_ERROR;
150
151 if (std::isdigit(ch))
152 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
153
154 if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)
155 return FORMAT_ERROR;
156
157 m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);
158 m_pDocument = pDocument;
159
160 bool bXRefRebuilt = false;
161 if (m_pSyntax->SearchWord("startxref", true, false, 4096)) {
162 m_SortedOffset.insert(m_pSyntax->SavePos());
163 m_pSyntax->GetKeyword();
164
165 bool bNumber;
166 CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
167 if (!bNumber)
168 return FORMAT_ERROR;
169
170 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str());
171 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
172 !LoadAllCrossRefV5(m_LastXRefOffset)) {
173 if (!RebuildCrossRef())
174 return FORMAT_ERROR;
175
176 bXRefRebuilt = true;
177 m_LastXRefOffset = 0;
178 }
179 } else {
180 if (!RebuildCrossRef())
181 return FORMAT_ERROR;
182
183 bXRefRebuilt = true;
184 }
185 Error eRet = SetEncryptHandler();
186 if (eRet != SUCCESS)
187 return eRet;
188
189 m_pDocument->LoadDoc();
190 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
191 if (bXRefRebuilt)
192 return FORMAT_ERROR;
193
194 ReleaseEncryptHandler();
195 if (!RebuildCrossRef())
196 return FORMAT_ERROR;
197
198 eRet = SetEncryptHandler();
199 if (eRet != SUCCESS)
200 return eRet;
201
202 m_pDocument->LoadDoc();
203 if (!m_pDocument->GetRoot())
204 return FORMAT_ERROR;
205 }
206 if (GetRootObjNum() == 0) {
207 ReleaseEncryptHandler();
208 if (!RebuildCrossRef() || GetRootObjNum() == 0)
209 return FORMAT_ERROR;
210
211 eRet = SetEncryptHandler();
212 if (eRet != SUCCESS)
213 return eRet;
214 }
215 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
216 CPDF_Reference* pMetadata =
217 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"));
218 if (pMetadata)
219 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
220 }
221 return SUCCESS;
222 }
SetEncryptHandler()223 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
224 ReleaseEncryptHandler();
225 SetEncryptDictionary(nullptr);
226
227 if (!m_pTrailer)
228 return FORMAT_ERROR;
229
230 CPDF_Object* pEncryptObj = m_pTrailer->GetObjectFor("Encrypt");
231 if (pEncryptObj) {
232 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
233 SetEncryptDictionary(pEncryptDict);
234 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
235 pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum());
236 if (pEncryptObj)
237 SetEncryptDictionary(pEncryptObj->GetDict());
238 }
239 }
240
241 if (m_pEncryptDict) {
242 CFX_ByteString filter = m_pEncryptDict->GetStringFor("Filter");
243 std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler;
244 Error err = HANDLER_ERROR;
245 if (filter == "Standard") {
246 pSecurityHandler = pdfium::MakeUnique<CPDF_SecurityHandler>();
247 err = PASSWORD_ERROR;
248 }
249 if (!pSecurityHandler)
250 return HANDLER_ERROR;
251
252 if (!pSecurityHandler->OnInit(this, m_pEncryptDict))
253 return err;
254
255 m_pSecurityHandler = std::move(pSecurityHandler);
256 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
257 m_pSecurityHandler->CreateCryptoHandler());
258 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
259 return HANDLER_ERROR;
260 m_pSyntax->SetEncrypt(std::move(pCryptoHandler));
261 }
262 return SUCCESS;
263 }
264
ReleaseEncryptHandler()265 void CPDF_Parser::ReleaseEncryptHandler() {
266 m_pSyntax->m_pCryptoHandler.reset();
267 m_pSecurityHandler.reset();
268 }
269
GetObjectOffset(uint32_t objnum) const270 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const {
271 if (!IsValidObjectNumber(objnum))
272 return 0;
273
274 if (GetObjectType(objnum) == 1)
275 return GetObjectPositionOrZero(objnum);
276
277 if (GetObjectType(objnum) == 2) {
278 FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
279 return GetObjectPositionOrZero(pos);
280 }
281 return 0;
282 }
283
284 // Ideally, all the cross reference entries should be verified.
285 // In reality, we rarely see well-formed cross references don't match
286 // with the objects. crbug/602650 showed a case where object numbers
287 // in the cross reference table are all off by one.
VerifyCrossRefV4()288 bool CPDF_Parser::VerifyCrossRefV4() {
289 for (const auto& it : m_ObjectInfo) {
290 if (it.second.pos == 0)
291 continue;
292 // Find the first non-zero position.
293 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
294 m_pSyntax->RestorePos(it.second.pos);
295 bool is_num = false;
296 CFX_ByteString num_str = m_pSyntax->GetNextWord(&is_num);
297 m_pSyntax->RestorePos(SavedPos);
298 if (!is_num || num_str.IsEmpty() ||
299 FXSYS_atoui(num_str.c_str()) != it.first) {
300 // If the object number read doesn't match the one stored,
301 // something is wrong with the cross reference table.
302 return false;
303 } else {
304 return true;
305 }
306 }
307 return true;
308 }
309
LoadAllCrossRefV4(FX_FILESIZE xrefpos)310 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
311 if (!LoadCrossRefV4(xrefpos, 0, true))
312 return false;
313
314 m_pTrailer = LoadTrailerV4();
315 if (!m_pTrailer)
316 return false;
317
318 int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size");
319 if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
320 ShrinkObjectMap(xrefsize);
321
322 std::vector<FX_FILESIZE> CrossRefList;
323 std::vector<FX_FILESIZE> XRefStreamList;
324 std::set<FX_FILESIZE> seen_xrefpos;
325
326 CrossRefList.push_back(xrefpos);
327 XRefStreamList.push_back(GetDirectInteger(m_pTrailer.get(), "XRefStm"));
328 seen_xrefpos.insert(xrefpos);
329
330 // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
331 // numerical, GetDirectInteger() returns 0. Loading will end.
332 xrefpos = GetDirectInteger(m_pTrailer.get(), "Prev");
333 while (xrefpos) {
334 // Check for circular references.
335 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
336 return false;
337
338 seen_xrefpos.insert(xrefpos);
339
340 // SLOW ...
341 CrossRefList.insert(CrossRefList.begin(), xrefpos);
342 LoadCrossRefV4(xrefpos, 0, true);
343
344 std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
345 if (!pDict)
346 return false;
347
348 xrefpos = GetDirectInteger(pDict.get(), "Prev");
349
350 // SLOW ...
351 XRefStreamList.insert(XRefStreamList.begin(),
352 pDict->GetIntegerFor("XRefStm"));
353 m_Trailers.push_back(std::move(pDict));
354 }
355
356 for (size_t i = 0; i < CrossRefList.size(); ++i) {
357 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], false))
358 return false;
359 if (i == 0 && !VerifyCrossRefV4())
360 return false;
361 }
362 return true;
363 }
364
LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,uint32_t dwObjCount)365 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
366 uint32_t dwObjCount) {
367 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
368 return false;
369
370 m_pTrailer = LoadTrailerV4();
371 if (!m_pTrailer)
372 return false;
373
374 int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size");
375 if (xrefsize == 0)
376 return false;
377
378 std::vector<FX_FILESIZE> CrossRefList;
379 std::vector<FX_FILESIZE> XRefStreamList;
380 std::set<FX_FILESIZE> seen_xrefpos;
381
382 CrossRefList.push_back(xrefpos);
383 XRefStreamList.push_back(GetDirectInteger(m_pTrailer.get(), "XRefStm"));
384 seen_xrefpos.insert(xrefpos);
385
386 xrefpos = GetDirectInteger(m_pTrailer.get(), "Prev");
387 while (xrefpos) {
388 // Check for circular references.
389 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
390 return false;
391
392 seen_xrefpos.insert(xrefpos);
393
394 // SLOW ...
395 CrossRefList.insert(CrossRefList.begin(), xrefpos);
396 LoadCrossRefV4(xrefpos, 0, true);
397
398 std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
399 if (!pDict)
400 return false;
401
402 xrefpos = GetDirectInteger(pDict.get(), "Prev");
403
404 // SLOW ...
405 XRefStreamList.insert(XRefStreamList.begin(),
406 pDict->GetIntegerFor("XRefStm"));
407 m_Trailers.push_back(std::move(pDict));
408 }
409
410 for (size_t i = 1; i < CrossRefList.size(); ++i) {
411 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], false))
412 return false;
413 }
414 return true;
415 }
416
LoadLinearizedCrossRefV4(FX_FILESIZE pos,uint32_t dwObjCount)417 bool CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
418 uint32_t dwObjCount) {
419 FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
420
421 m_pSyntax->RestorePos(dwStartPos);
422 m_SortedOffset.insert(pos);
423
424 uint32_t start_objnum = 0;
425 uint32_t count = dwObjCount;
426 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
427
428 const int32_t recordsize = 20;
429 std::vector<char> buf(1024 * recordsize + 1);
430 buf[1024 * recordsize] = '\0';
431
432 int32_t nBlocks = count / 1024 + 1;
433 for (int32_t block = 0; block < nBlocks; block++) {
434 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
435 uint32_t dwReadSize = block_size * recordsize;
436 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
437 return false;
438
439 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
440 dwReadSize)) {
441 return false;
442 }
443
444 for (int32_t i = 0; i < block_size; i++) {
445 uint32_t objnum = start_objnum + block * 1024 + i;
446 char* pEntry = &buf[i * recordsize];
447 if (pEntry[17] == 'f') {
448 m_ObjectInfo[objnum].pos = 0;
449 m_ObjectInfo[objnum].type = 0;
450 } else {
451 int32_t offset = FXSYS_atoi(pEntry);
452 if (offset == 0) {
453 for (int32_t c = 0; c < 10; c++) {
454 if (!std::isdigit(pEntry[c]))
455 return false;
456 }
457 }
458
459 m_ObjectInfo[objnum].pos = offset;
460 int32_t version = FXSYS_atoi(pEntry + 11);
461 if (version >= 1)
462 m_bVersionUpdated = true;
463
464 m_ObjectInfo[objnum].gennum = version;
465 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
466 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
467
468 m_ObjectInfo[objnum].type = 1;
469 }
470 }
471 }
472 m_pSyntax->RestorePos(SavedPos + count * recordsize);
473 return true;
474 }
475
LoadCrossRefV4(FX_FILESIZE pos,FX_FILESIZE streampos,bool bSkip)476 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
477 FX_FILESIZE streampos,
478 bool bSkip) {
479 m_pSyntax->RestorePos(pos);
480 if (m_pSyntax->GetKeyword() != "xref")
481 return false;
482
483 m_SortedOffset.insert(pos);
484 if (streampos)
485 m_SortedOffset.insert(streampos);
486
487 while (1) {
488 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
489 bool bIsNumber;
490 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
491 if (word.IsEmpty())
492 return false;
493
494 if (!bIsNumber) {
495 m_pSyntax->RestorePos(SavedPos);
496 break;
497 }
498
499 uint32_t start_objnum = FXSYS_atoui(word.c_str());
500 if (start_objnum >= kMaxObjectNumber)
501 return false;
502
503 uint32_t count = m_pSyntax->GetDirectNum();
504 m_pSyntax->ToNextWord();
505 SavedPos = m_pSyntax->SavePos();
506 const int32_t recordsize = 20;
507
508 m_dwXrefStartObjNum = start_objnum;
509 if (!bSkip) {
510 std::vector<char> buf(1024 * recordsize + 1);
511 buf[1024 * recordsize] = '\0';
512
513 int32_t nBlocks = count / 1024 + 1;
514 for (int32_t block = 0; block < nBlocks; block++) {
515 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
516 m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
517 block_size * recordsize);
518
519 for (int32_t i = 0; i < block_size; i++) {
520 uint32_t objnum = start_objnum + block * 1024 + i;
521 char* pEntry = &buf[i * recordsize];
522 if (pEntry[17] == 'f') {
523 m_ObjectInfo[objnum].pos = 0;
524 m_ObjectInfo[objnum].type = 0;
525 } else {
526 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
527 if (offset == 0) {
528 for (int32_t c = 0; c < 10; c++) {
529 if (!std::isdigit(pEntry[c]))
530 return false;
531 }
532 }
533
534 m_ObjectInfo[objnum].pos = offset;
535 int32_t version = FXSYS_atoi(pEntry + 11);
536 if (version >= 1)
537 m_bVersionUpdated = true;
538
539 m_ObjectInfo[objnum].gennum = version;
540 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
541 m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
542
543 m_ObjectInfo[objnum].type = 1;
544 }
545 }
546 }
547 }
548 m_pSyntax->RestorePos(SavedPos + count * recordsize);
549 }
550 return !streampos || LoadCrossRefV5(&streampos, false);
551 }
552
LoadAllCrossRefV5(FX_FILESIZE xrefpos)553 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
554 if (!LoadCrossRefV5(&xrefpos, true))
555 return false;
556
557 std::set<FX_FILESIZE> seen_xrefpos;
558 while (xrefpos) {
559 seen_xrefpos.insert(xrefpos);
560 if (!LoadCrossRefV5(&xrefpos, false))
561 return false;
562
563 // Check for circular references.
564 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
565 return false;
566 }
567 m_ObjectStreamMap.clear();
568 m_bXRefStream = true;
569 return true;
570 }
571
RebuildCrossRef()572 bool CPDF_Parser::RebuildCrossRef() {
573 m_ObjectInfo.clear();
574 m_SortedOffset.clear();
575 m_pTrailer.reset();
576
577 ParserState state = ParserState::kDefault;
578 int32_t inside_index = 0;
579 uint32_t objnum = 0;
580 uint32_t gennum = 0;
581 int32_t depth = 0;
582 const uint32_t kBufferSize = 4096;
583 std::vector<uint8_t> buffer(kBufferSize);
584
585 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
586 FX_FILESIZE start_pos = 0;
587 FX_FILESIZE start_pos1 = 0;
588 FX_FILESIZE last_obj = -1;
589 FX_FILESIZE last_xref = -1;
590 FX_FILESIZE last_trailer = -1;
591
592 while (pos < m_pSyntax->m_FileLen) {
593 const FX_FILESIZE saved_pos = pos;
594 bool bOverFlow = false;
595 uint32_t size =
596 std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize);
597 if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))
598 break;
599
600 for (uint32_t i = 0; i < size; i++) {
601 uint8_t byte = buffer[i];
602 switch (state) {
603 case ParserState::kDefault:
604 if (PDFCharIsWhitespace(byte)) {
605 state = ParserState::kWhitespace;
606 } else if (std::isdigit(byte)) {
607 --i;
608 state = ParserState::kWhitespace;
609 } else if (byte == '%') {
610 inside_index = 0;
611 state = ParserState::kComment;
612 } else if (byte == '(') {
613 state = ParserState::kString;
614 depth = 1;
615 } else if (byte == '<') {
616 inside_index = 1;
617 state = ParserState::kHexString;
618 } else if (byte == '\\') {
619 state = ParserState::kEscapedString;
620 } else if (byte == 't') {
621 state = ParserState::kTrailer;
622 inside_index = 1;
623 }
624 break;
625
626 case ParserState::kWhitespace:
627 if (std::isdigit(byte)) {
628 start_pos = pos + i;
629 state = ParserState::kObjNum;
630 objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
631 } else if (byte == 't') {
632 state = ParserState::kTrailer;
633 inside_index = 1;
634 } else if (byte == 'x') {
635 state = ParserState::kXref;
636 inside_index = 1;
637 } else if (!PDFCharIsWhitespace(byte)) {
638 --i;
639 state = ParserState::kDefault;
640 }
641 break;
642
643 case ParserState::kObjNum:
644 if (std::isdigit(byte)) {
645 objnum =
646 objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
647 } else if (PDFCharIsWhitespace(byte)) {
648 state = ParserState::kPostObjNum;
649 } else {
650 --i;
651 state = ParserState::kEndObj;
652 inside_index = 0;
653 }
654 break;
655
656 case ParserState::kPostObjNum:
657 if (std::isdigit(byte)) {
658 start_pos1 = pos + i;
659 state = ParserState::kGenNum;
660 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
661 } else if (byte == 't') {
662 state = ParserState::kTrailer;
663 inside_index = 1;
664 } else if (!PDFCharIsWhitespace(byte)) {
665 --i;
666 state = ParserState::kDefault;
667 }
668 break;
669
670 case ParserState::kGenNum:
671 if (std::isdigit(byte)) {
672 gennum =
673 gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
674 } else if (PDFCharIsWhitespace(byte)) {
675 state = ParserState::kPostGenNum;
676 } else {
677 --i;
678 state = ParserState::kDefault;
679 }
680 break;
681
682 case ParserState::kPostGenNum:
683 if (byte == 'o') {
684 state = ParserState::kBeginObj;
685 inside_index = 1;
686 } else if (std::isdigit(byte)) {
687 objnum = gennum;
688 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
689 start_pos = start_pos1;
690 start_pos1 = pos + i;
691 state = ParserState::kGenNum;
692 } else if (byte == 't') {
693 state = ParserState::kTrailer;
694 inside_index = 1;
695 } else if (!PDFCharIsWhitespace(byte)) {
696 --i;
697 state = ParserState::kDefault;
698 }
699 break;
700
701 case ParserState::kBeginObj:
702 switch (inside_index) {
703 case 1:
704 if (byte != 'b') {
705 --i;
706 state = ParserState::kDefault;
707 } else {
708 inside_index++;
709 }
710 break;
711 case 2:
712 if (byte != 'j') {
713 --i;
714 state = ParserState::kDefault;
715 } else {
716 inside_index++;
717 }
718 break;
719 case 3:
720 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
721 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
722 m_SortedOffset.insert(obj_pos);
723 last_obj = start_pos;
724 FX_FILESIZE obj_end = 0;
725 std::unique_ptr<CPDF_Object> pObject =
726 ParseIndirectObjectAtByStrict(m_pDocument, obj_pos, objnum,
727 &obj_end);
728 if (CPDF_Stream* pStream = ToStream(pObject.get())) {
729 if (CPDF_Dictionary* pDict = pStream->GetDict()) {
730 if ((pDict->KeyExist("Type")) &&
731 (pDict->GetStringFor("Type") == "XRef" &&
732 pDict->KeyExist("Size"))) {
733 CPDF_Object* pRoot = pDict->GetObjectFor("Root");
734 if (pRoot && pRoot->GetDict() &&
735 pRoot->GetDict()->GetObjectFor("Pages")) {
736 m_pTrailer = ToDictionary(pDict->Clone());
737 }
738 }
739 }
740 }
741
742 FX_FILESIZE offset = 0;
743 m_pSyntax->RestorePos(obj_pos);
744 offset = m_pSyntax->FindTag("obj", 0);
745 if (offset == -1)
746 offset = 0;
747 else
748 offset += 3;
749
750 FX_FILESIZE nLen = obj_end - obj_pos - offset;
751 if ((uint32_t)nLen > size - i) {
752 pos = obj_end + m_pSyntax->m_HeaderOffset;
753 bOverFlow = true;
754 } else {
755 i += (uint32_t)nLen;
756 }
757
758 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
759 m_ObjectInfo[objnum].pos) {
760 if (pObject) {
761 uint32_t oldgen = GetObjectGenNum(objnum);
762 m_ObjectInfo[objnum].pos = obj_pos;
763 m_ObjectInfo[objnum].gennum = gennum;
764 if (oldgen != gennum)
765 m_bVersionUpdated = true;
766 }
767 } else {
768 m_ObjectInfo[objnum].pos = obj_pos;
769 m_ObjectInfo[objnum].type = 1;
770 m_ObjectInfo[objnum].gennum = gennum;
771 }
772 }
773 --i;
774 state = ParserState::kDefault;
775 break;
776 }
777 break;
778
779 case ParserState::kTrailer:
780 if (inside_index == 7) {
781 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
782 last_trailer = pos + i - 7;
783 m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);
784
785 std::unique_ptr<CPDF_Object> pObj =
786 m_pSyntax->GetObject(m_pDocument, 0, 0, true);
787 if (pObj) {
788 if (pObj->IsDictionary() || pObj->AsStream()) {
789 CPDF_Stream* pStream = pObj->AsStream();
790 if (CPDF_Dictionary* pTrailer =
791 pStream ? pStream->GetDict() : pObj->AsDictionary()) {
792 if (m_pTrailer) {
793 CPDF_Object* pRoot = pTrailer->GetObjectFor("Root");
794 CPDF_Reference* pRef = ToReference(pRoot);
795 if (!pRoot ||
796 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
797 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
798 auto it = pTrailer->begin();
799 while (it != pTrailer->end()) {
800 const CFX_ByteString& key = it->first;
801 CPDF_Object* pElement = it->second.get();
802 ++it;
803 uint32_t dwObjNum =
804 pElement ? pElement->GetObjNum() : 0;
805 if (dwObjNum) {
806 m_pTrailer->SetNewFor<CPDF_Reference>(
807 key, m_pDocument, dwObjNum);
808 } else {
809 m_pTrailer->SetFor(key, pElement->Clone());
810 }
811 }
812 }
813 } else {
814 if (pObj->IsStream()) {
815 m_pTrailer = ToDictionary(pTrailer->Clone());
816 } else {
817 m_pTrailer = ToDictionary(std::move(pObj));
818 }
819
820 FX_FILESIZE dwSavePos = m_pSyntax->SavePos();
821 CFX_ByteString strWord = m_pSyntax->GetKeyword();
822 if (!strWord.Compare("startxref")) {
823 bool bNumber;
824 CFX_ByteString bsOffset =
825 m_pSyntax->GetNextWord(&bNumber);
826 if (bNumber)
827 m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str());
828 }
829 m_pSyntax->RestorePos(dwSavePos);
830 }
831 }
832 }
833 }
834 }
835 --i;
836 state = ParserState::kDefault;
837 } else if (byte == "trailer"[inside_index]) {
838 inside_index++;
839 } else {
840 --i;
841 state = ParserState::kDefault;
842 }
843 break;
844
845 case ParserState::kXref:
846 if (inside_index == 4) {
847 last_xref = pos + i - 4;
848 state = ParserState::kWhitespace;
849 } else if (byte == "xref"[inside_index]) {
850 inside_index++;
851 } else {
852 --i;
853 state = ParserState::kDefault;
854 }
855 break;
856
857 case ParserState::kComment:
858 if (PDFCharIsLineEnding(byte))
859 state = ParserState::kDefault;
860 break;
861
862 case ParserState::kString:
863 if (byte == ')') {
864 if (depth > 0)
865 depth--;
866 } else if (byte == '(') {
867 depth++;
868 }
869
870 if (!depth)
871 state = ParserState::kDefault;
872 break;
873
874 case ParserState::kHexString:
875 if (byte == '>' || (byte == '<' && inside_index == 1))
876 state = ParserState::kDefault;
877 inside_index = 0;
878 break;
879
880 case ParserState::kEscapedString:
881 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
882 --i;
883 state = ParserState::kDefault;
884 }
885 break;
886
887 case ParserState::kEndObj:
888 if (PDFCharIsWhitespace(byte)) {
889 state = ParserState::kDefault;
890 } else if (byte == '%' || byte == '(' || byte == '<' ||
891 byte == '\\') {
892 state = ParserState::kDefault;
893 --i;
894 } else if (inside_index == 6) {
895 state = ParserState::kDefault;
896 --i;
897 } else if (byte == "endobj"[inside_index]) {
898 inside_index++;
899 }
900 break;
901 }
902
903 if (bOverFlow) {
904 size = 0;
905 break;
906 }
907 }
908 pos += size;
909
910 // If the position has not changed at all or went backwards in a loop
911 // iteration, then break out to prevent infinite looping.
912 if (pos <= saved_pos)
913 break;
914 }
915
916 if (last_xref != -1 && last_xref > last_obj)
917 last_trailer = last_xref;
918 else if (last_trailer == -1 || last_xref < last_obj)
919 last_trailer = m_pSyntax->m_FileLen;
920
921 m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);
922 return m_pTrailer && !m_ObjectInfo.empty();
923 }
924
LoadCrossRefV5(FX_FILESIZE * pos,bool bMainXRef)925 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
926 std::unique_ptr<CPDF_Object> pObject(
927 ParseIndirectObjectAt(m_pDocument, *pos, 0));
928 if (!pObject)
929 return false;
930
931 uint32_t objnum = pObject->m_ObjNum;
932 if (!objnum)
933 return false;
934
935 CPDF_Object* pUnownedObject = pObject.get();
936 if (m_pDocument) {
937 CPDF_Dictionary* pRootDict = m_pDocument->GetRoot();
938 if (pRootDict && pRootDict->GetObjNum() == objnum)
939 return false;
940 if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
941 objnum, std::move(pObject))) {
942 return false;
943 }
944 }
945
946 CPDF_Stream* pStream = pUnownedObject->AsStream();
947 if (!pStream)
948 return false;
949
950 CPDF_Dictionary* pDict = pStream->GetDict();
951 *pos = pDict->GetIntegerFor("Prev");
952 int32_t size = pDict->GetIntegerFor("Size");
953 if (size < 0)
954 return false;
955
956 std::unique_ptr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
957 if (bMainXRef) {
958 m_pTrailer = std::move(pNewTrailer);
959 ShrinkObjectMap(size);
960 for (auto& it : m_ObjectInfo)
961 it.second.type = 0;
962 } else {
963 m_Trailers.push_back(std::move(pNewTrailer));
964 }
965
966 std::vector<std::pair<int32_t, int32_t>> arrIndex;
967 CPDF_Array* pArray = pDict->GetArrayFor("Index");
968 if (pArray) {
969 for (size_t i = 0; i < pArray->GetCount() / 2; i++) {
970 CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2);
971 CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1);
972
973 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
974 int nStartNum = pStartNumObj->GetInteger();
975 int nCount = pCountObj->GetInteger();
976 if (nStartNum >= 0 && nCount > 0)
977 arrIndex.push_back(std::make_pair(nStartNum, nCount));
978 }
979 }
980 }
981
982 if (arrIndex.size() == 0)
983 arrIndex.push_back(std::make_pair(0, size));
984
985 pArray = pDict->GetArrayFor("W");
986 if (!pArray)
987 return false;
988
989 std::vector<uint32_t> WidthArray;
990 FX_SAFE_UINT32 dwAccWidth = 0;
991 for (size_t i = 0; i < pArray->GetCount(); ++i) {
992 WidthArray.push_back(pArray->GetIntegerAt(i));
993 dwAccWidth += WidthArray[i];
994 }
995
996 if (!dwAccWidth.IsValid() || WidthArray.size() < 3)
997 return false;
998
999 uint32_t totalWidth = dwAccWidth.ValueOrDie();
1000 CPDF_StreamAcc acc;
1001 acc.LoadAllData(pStream);
1002
1003 const uint8_t* pData = acc.GetData();
1004 uint32_t dwTotalSize = acc.GetSize();
1005 uint32_t segindex = 0;
1006 for (uint32_t i = 0; i < arrIndex.size(); i++) {
1007 int32_t startnum = arrIndex[i].first;
1008 if (startnum < 0)
1009 continue;
1010
1011 m_dwXrefStartObjNum = pdfium::base::checked_cast<uint32_t>(startnum);
1012 uint32_t count = pdfium::base::checked_cast<uint32_t>(arrIndex[i].second);
1013 FX_SAFE_UINT32 dwCaculatedSize = segindex;
1014 dwCaculatedSize += count;
1015 dwCaculatedSize *= totalWidth;
1016 if (!dwCaculatedSize.IsValid() ||
1017 dwCaculatedSize.ValueOrDie() > dwTotalSize) {
1018 continue;
1019 }
1020
1021 const uint8_t* segstart = pData + segindex * totalWidth;
1022 FX_SAFE_UINT32 dwMaxObjNum = startnum;
1023 dwMaxObjNum += count;
1024 uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
1025 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
1026 continue;
1027
1028 for (uint32_t j = 0; j < count; j++) {
1029 int32_t type = 1;
1030 const uint8_t* entrystart = segstart + j * totalWidth;
1031 if (WidthArray[0])
1032 type = GetVarInt(entrystart, WidthArray[0]);
1033
1034 if (GetObjectType(startnum + j) == 255) {
1035 FX_FILESIZE offset =
1036 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1037 m_ObjectInfo[startnum + j].pos = offset;
1038 m_SortedOffset.insert(offset);
1039 continue;
1040 }
1041
1042 if (GetObjectType(startnum + j))
1043 continue;
1044
1045 m_ObjectInfo[startnum + j].type = type;
1046 if (type == 0) {
1047 m_ObjectInfo[startnum + j].pos = 0;
1048 } else {
1049 FX_FILESIZE offset =
1050 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1051 m_ObjectInfo[startnum + j].pos = offset;
1052 if (type == 1) {
1053 m_SortedOffset.insert(offset);
1054 } else {
1055 if (offset < 0 || !IsValidObjectNumber(offset))
1056 return false;
1057 m_ObjectInfo[offset].type = 255;
1058 }
1059 }
1060 }
1061 segindex += count;
1062 }
1063 return true;
1064 }
1065
GetIDArray()1066 CPDF_Array* CPDF_Parser::GetIDArray() {
1067 if (!m_pTrailer)
1068 return nullptr;
1069
1070 CPDF_Object* pID = m_pTrailer->GetObjectFor("ID");
1071 if (!pID)
1072 return nullptr;
1073
1074 CPDF_Reference* pRef = pID->AsReference();
1075 if (!pRef)
1076 return ToArray(pID);
1077
1078 std::unique_ptr<CPDF_Object> pNewObj =
1079 ParseIndirectObject(nullptr, pRef->GetRefObjNum());
1080 pID = pNewObj.get();
1081 m_pTrailer->SetFor("ID", std::move(pNewObj));
1082 return ToArray(pID);
1083 }
1084
GetRootObjNum()1085 uint32_t CPDF_Parser::GetRootObjNum() {
1086 CPDF_Reference* pRef =
1087 ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Root") : nullptr);
1088 return pRef ? pRef->GetRefObjNum() : 0;
1089 }
1090
GetInfoObjNum()1091 uint32_t CPDF_Parser::GetInfoObjNum() {
1092 CPDF_Reference* pRef =
1093 ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Info") : nullptr);
1094 return pRef ? pRef->GetRefObjNum() : 0;
1095 }
1096
ParseIndirectObject(CPDF_IndirectObjectHolder * pObjList,uint32_t objnum)1097 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject(
1098 CPDF_IndirectObjectHolder* pObjList,
1099 uint32_t objnum) {
1100 if (!IsValidObjectNumber(objnum))
1101 return nullptr;
1102
1103 // Prevent circular parsing the same object.
1104 if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
1105 return nullptr;
1106
1107 pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
1108 if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) {
1109 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1110 if (pos <= 0)
1111 return nullptr;
1112 return ParseIndirectObjectAt(pObjList, pos, objnum);
1113 }
1114 if (GetObjectType(objnum) != 2)
1115 return nullptr;
1116
1117 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1118 if (!pObjStream)
1119 return nullptr;
1120
1121 CFX_RetainPtr<IFX_MemoryStream> file = IFX_MemoryStream::Create(
1122 (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), false);
1123 CPDF_SyntaxParser syntax;
1124 syntax.InitParser(file, 0);
1125 const int32_t offset = GetStreamFirst(pObjStream);
1126
1127 // Read object numbers from |pObjStream| into a cache.
1128 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
1129 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
1130 uint32_t thisnum = syntax.GetDirectNum();
1131 uint32_t thisoff = syntax.GetDirectNum();
1132 m_ObjCache[pObjStream][thisnum] = thisoff;
1133 }
1134 }
1135
1136 const auto it = m_ObjCache[pObjStream].find(objnum);
1137 if (it == m_ObjCache[pObjStream].end())
1138 return nullptr;
1139
1140 syntax.RestorePos(offset + it->second);
1141 return syntax.GetObject(pObjList, 0, 0, true);
1142 }
1143
GetObjectStream(uint32_t objnum)1144 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(uint32_t objnum) {
1145 auto it = m_ObjectStreamMap.find(objnum);
1146 if (it != m_ObjectStreamMap.end())
1147 return it->second.get();
1148
1149 if (!m_pDocument)
1150 return nullptr;
1151
1152 const CPDF_Stream* pStream =
1153 ToStream(m_pDocument->GetOrParseIndirectObject(objnum));
1154 if (!pStream)
1155 return nullptr;
1156
1157 CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc;
1158 pStreamAcc->LoadAllData(pStream);
1159 m_ObjectStreamMap[objnum].reset(pStreamAcc);
1160 return pStreamAcc;
1161 }
1162
GetObjectSize(uint32_t objnum) const1163 FX_FILESIZE CPDF_Parser::GetObjectSize(uint32_t objnum) const {
1164 if (!IsValidObjectNumber(objnum))
1165 return 0;
1166
1167 if (GetObjectType(objnum) == 2)
1168 objnum = GetObjectPositionOrZero(objnum);
1169
1170 if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255)
1171 return 0;
1172
1173 FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
1174 if (offset == 0)
1175 return 0;
1176
1177 auto it = m_SortedOffset.find(offset);
1178 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end())
1179 return 0;
1180
1181 return *it - offset;
1182 }
1183
GetIndirectBinary(uint32_t objnum,uint8_t * & pBuffer,uint32_t & size)1184 void CPDF_Parser::GetIndirectBinary(uint32_t objnum,
1185 uint8_t*& pBuffer,
1186 uint32_t& size) {
1187 pBuffer = nullptr;
1188 size = 0;
1189 if (!IsValidObjectNumber(objnum))
1190 return;
1191
1192 if (GetObjectType(objnum) == 2) {
1193 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1194 if (!pObjStream)
1195 return;
1196
1197 int32_t offset = GetStreamFirst(pObjStream);
1198 const uint8_t* pData = pObjStream->GetData();
1199 uint32_t totalsize = pObjStream->GetSize();
1200 CFX_RetainPtr<IFX_MemoryStream> file =
1201 IFX_MemoryStream::Create((uint8_t*)pData, (size_t)totalsize, false);
1202 CPDF_SyntaxParser syntax;
1203 syntax.InitParser(file, 0);
1204
1205 for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
1206 uint32_t thisnum = syntax.GetDirectNum();
1207 uint32_t thisoff = syntax.GetDirectNum();
1208 if (thisnum != objnum)
1209 continue;
1210
1211 if (i == 1) {
1212 size = totalsize - (thisoff + offset);
1213 } else {
1214 syntax.GetDirectNum(); // Skip nextnum.
1215 uint32_t nextoff = syntax.GetDirectNum();
1216 size = nextoff - thisoff;
1217 }
1218
1219 pBuffer = FX_Alloc(uint8_t, size);
1220 FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
1221 return;
1222 }
1223 return;
1224 }
1225
1226 if (GetObjectType(objnum) != 1)
1227 return;
1228
1229 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1230 if (pos == 0)
1231 return;
1232
1233 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1234 m_pSyntax->RestorePos(pos);
1235
1236 bool bIsNumber;
1237 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1238 if (!bIsNumber) {
1239 m_pSyntax->RestorePos(SavedPos);
1240 return;
1241 }
1242
1243 uint32_t parser_objnum = FXSYS_atoui(word.c_str());
1244 if (parser_objnum && parser_objnum != objnum) {
1245 m_pSyntax->RestorePos(SavedPos);
1246 return;
1247 }
1248
1249 word = m_pSyntax->GetNextWord(&bIsNumber);
1250 if (!bIsNumber) {
1251 m_pSyntax->RestorePos(SavedPos);
1252 return;
1253 }
1254
1255 if (m_pSyntax->GetKeyword() != "obj") {
1256 m_pSyntax->RestorePos(SavedPos);
1257 return;
1258 }
1259
1260 auto it = m_SortedOffset.find(pos);
1261 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
1262 m_pSyntax->RestorePos(SavedPos);
1263 return;
1264 }
1265
1266 FX_FILESIZE nextoff = *it;
1267 bool bNextOffValid = false;
1268 if (nextoff != pos) {
1269 m_pSyntax->RestorePos(nextoff);
1270 word = m_pSyntax->GetNextWord(&bIsNumber);
1271 if (word == "xref") {
1272 bNextOffValid = true;
1273 } else if (bIsNumber) {
1274 word = m_pSyntax->GetNextWord(&bIsNumber);
1275 if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {
1276 bNextOffValid = true;
1277 }
1278 }
1279 }
1280
1281 if (!bNextOffValid) {
1282 m_pSyntax->RestorePos(pos);
1283 while (1) {
1284 if (m_pSyntax->GetKeyword() == "endobj")
1285 break;
1286
1287 if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)
1288 break;
1289 }
1290 nextoff = m_pSyntax->SavePos();
1291 }
1292
1293 size = (uint32_t)(nextoff - pos);
1294 pBuffer = FX_Alloc(uint8_t, size);
1295 m_pSyntax->RestorePos(pos);
1296 m_pSyntax->ReadBlock(pBuffer, size);
1297 m_pSyntax->RestorePos(SavedPos);
1298 }
1299
ParseIndirectObjectAt(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,uint32_t objnum)1300 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(
1301 CPDF_IndirectObjectHolder* pObjList,
1302 FX_FILESIZE pos,
1303 uint32_t objnum) {
1304 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1305 m_pSyntax->RestorePos(pos);
1306 bool bIsNumber;
1307 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1308 if (!bIsNumber) {
1309 m_pSyntax->RestorePos(SavedPos);
1310 return nullptr;
1311 }
1312
1313 FX_FILESIZE objOffset = m_pSyntax->SavePos();
1314 objOffset -= word.GetLength();
1315 uint32_t parser_objnum = FXSYS_atoui(word.c_str());
1316 if (objnum && parser_objnum != objnum) {
1317 m_pSyntax->RestorePos(SavedPos);
1318 return nullptr;
1319 }
1320
1321 word = m_pSyntax->GetNextWord(&bIsNumber);
1322 if (!bIsNumber) {
1323 m_pSyntax->RestorePos(SavedPos);
1324 return nullptr;
1325 }
1326
1327 uint32_t parser_gennum = FXSYS_atoui(word.c_str());
1328 if (m_pSyntax->GetKeyword() != "obj") {
1329 m_pSyntax->RestorePos(SavedPos);
1330 return nullptr;
1331 }
1332
1333 std::unique_ptr<CPDF_Object> pObj =
1334 m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);
1335 m_pSyntax->SavePos();
1336
1337 CFX_ByteString bsWord = m_pSyntax->GetKeyword();
1338 if (bsWord == "endobj")
1339 m_pSyntax->SavePos();
1340
1341 m_pSyntax->RestorePos(SavedPos);
1342 if (pObj) {
1343 if (!objnum)
1344 pObj->m_ObjNum = parser_objnum;
1345 pObj->m_GenNum = parser_gennum;
1346 }
1347 return pObj;
1348 }
1349
ParseIndirectObjectAtByStrict(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,uint32_t objnum,FX_FILESIZE * pResultPos)1350 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtByStrict(
1351 CPDF_IndirectObjectHolder* pObjList,
1352 FX_FILESIZE pos,
1353 uint32_t objnum,
1354 FX_FILESIZE* pResultPos) {
1355 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1356 m_pSyntax->RestorePos(pos);
1357
1358 bool bIsNumber;
1359 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1360 if (!bIsNumber) {
1361 m_pSyntax->RestorePos(SavedPos);
1362 return nullptr;
1363 }
1364
1365 uint32_t parser_objnum = FXSYS_atoui(word.c_str());
1366 if (objnum && parser_objnum != objnum) {
1367 m_pSyntax->RestorePos(SavedPos);
1368 return nullptr;
1369 }
1370
1371 word = m_pSyntax->GetNextWord(&bIsNumber);
1372 if (!bIsNumber) {
1373 m_pSyntax->RestorePos(SavedPos);
1374 return nullptr;
1375 }
1376
1377 uint32_t gennum = FXSYS_atoui(word.c_str());
1378 if (m_pSyntax->GetKeyword() != "obj") {
1379 m_pSyntax->RestorePos(SavedPos);
1380 return nullptr;
1381 }
1382
1383 std::unique_ptr<CPDF_Object> pObj =
1384 m_pSyntax->GetObjectForStrict(pObjList, objnum, gennum);
1385
1386 if (pResultPos)
1387 *pResultPos = m_pSyntax->m_Pos;
1388
1389 m_pSyntax->RestorePos(SavedPos);
1390 return pObj;
1391 }
1392
GetFirstPageNo() const1393 uint32_t CPDF_Parser::GetFirstPageNo() const {
1394 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1395 }
1396
LoadTrailerV4()1397 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
1398 if (m_pSyntax->GetKeyword() != "trailer")
1399 return nullptr;
1400
1401 return ToDictionary(m_pSyntax->GetObject(m_pDocument, 0, 0, true));
1402 }
1403
GetPermissions() const1404 uint32_t CPDF_Parser::GetPermissions() const {
1405 if (!m_pSecurityHandler)
1406 return 0xFFFFFFFF;
1407
1408 uint32_t dwPermission = m_pSecurityHandler->GetPermissions();
1409 if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") {
1410 // See PDF Reference 1.7, page 123, table 3.20.
1411 dwPermission &= 0xFFFFFFFC;
1412 dwPermission |= 0xFFFFF0C0;
1413 }
1414 return dwPermission;
1415 }
1416
IsLinearizedFile(const CFX_RetainPtr<IFX_SeekableReadStream> & pFileAccess,uint32_t offset)1417 bool CPDF_Parser::IsLinearizedFile(
1418 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
1419 uint32_t offset) {
1420 m_pSyntax->InitParser(pFileAccess, offset);
1421 m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);
1422
1423 FX_FILESIZE SavedPos = m_pSyntax->SavePos();
1424 bool bIsNumber;
1425 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
1426 if (!bIsNumber)
1427 return false;
1428
1429 uint32_t objnum = FXSYS_atoui(word.c_str());
1430 word = m_pSyntax->GetNextWord(&bIsNumber);
1431 if (!bIsNumber)
1432 return false;
1433
1434 uint32_t gennum = FXSYS_atoui(word.c_str());
1435 if (m_pSyntax->GetKeyword() != "obj") {
1436 m_pSyntax->RestorePos(SavedPos);
1437 return false;
1438 }
1439
1440 m_pLinearized = CPDF_LinearizedHeader::CreateForObject(
1441 m_pSyntax->GetObject(nullptr, objnum, gennum, true));
1442 if (!m_pLinearized)
1443 return false;
1444
1445 m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
1446 // Move parser onto first page xref table start.
1447 m_pSyntax->GetNextWord(nullptr);
1448 return true;
1449 }
1450
StartLinearizedParse(const CFX_RetainPtr<IFX_SeekableReadStream> & pFileAccess,CPDF_Document * pDocument)1451 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
1452 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
1453 CPDF_Document* pDocument) {
1454 ASSERT(!m_bHasParsed);
1455 m_bXRefStream = false;
1456 m_LastXRefOffset = 0;
1457
1458 int32_t offset = GetHeaderOffset(pFileAccess);
1459 if (offset == -1)
1460 return FORMAT_ERROR;
1461
1462 if (!IsLinearizedFile(pFileAccess, offset)) {
1463 m_pSyntax->m_pFileAccess = nullptr;
1464 return StartParse(pFileAccess, std::move(pDocument));
1465 }
1466 m_bHasParsed = true;
1467 m_pDocument = pDocument;
1468
1469 FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();
1470 bool bXRefRebuilt = false;
1471 bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, false);
1472 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
1473 if (!RebuildCrossRef())
1474 return FORMAT_ERROR;
1475
1476 bXRefRebuilt = true;
1477 m_LastXRefOffset = 0;
1478 }
1479
1480 if (bLoadV4) {
1481 m_pTrailer = LoadTrailerV4();
1482 if (!m_pTrailer)
1483 return SUCCESS;
1484
1485 int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size");
1486 if (xrefsize > 0)
1487 ShrinkObjectMap(xrefsize);
1488 }
1489
1490 Error eRet = SetEncryptHandler();
1491 if (eRet != SUCCESS)
1492 return eRet;
1493
1494 m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
1495 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
1496 if (bXRefRebuilt)
1497 return FORMAT_ERROR;
1498
1499 ReleaseEncryptHandler();
1500 if (!RebuildCrossRef())
1501 return FORMAT_ERROR;
1502
1503 eRet = SetEncryptHandler();
1504 if (eRet != SUCCESS)
1505 return eRet;
1506
1507 m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
1508 if (!m_pDocument->GetRoot())
1509 return FORMAT_ERROR;
1510 }
1511
1512 if (GetRootObjNum() == 0) {
1513 ReleaseEncryptHandler();
1514 if (!RebuildCrossRef() || GetRootObjNum() == 0)
1515 return FORMAT_ERROR;
1516
1517 eRet = SetEncryptHandler();
1518 if (eRet != SUCCESS)
1519 return eRet;
1520 }
1521
1522 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1523 if (CPDF_Reference* pMetadata =
1524 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")))
1525 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
1526 }
1527 return SUCCESS;
1528 }
1529
LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)1530 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
1531 if (!LoadCrossRefV5(&xrefpos, false))
1532 return false;
1533
1534 std::set<FX_FILESIZE> seen_xrefpos;
1535 while (xrefpos) {
1536 seen_xrefpos.insert(xrefpos);
1537 if (!LoadCrossRefV5(&xrefpos, false))
1538 return false;
1539
1540 // Check for circular references.
1541 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
1542 return false;
1543 }
1544 m_ObjectStreamMap.clear();
1545 m_bXRefStream = true;
1546 return true;
1547 }
1548
LoadLinearizedMainXRefTable()1549 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1550 uint32_t dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
1551 m_pSyntax->m_MetadataObjnum = 0;
1552 m_pTrailer.reset();
1553 m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
1554
1555 uint8_t ch = 0;
1556 uint32_t dwCount = 0;
1557 m_pSyntax->GetNextChar(ch);
1558 while (PDFCharIsWhitespace(ch)) {
1559 ++dwCount;
1560 if (m_pSyntax->m_FileLen <=
1561 (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
1562 break;
1563 }
1564 m_pSyntax->GetNextChar(ch);
1565 }
1566 m_LastXRefOffset += dwCount;
1567 m_ObjectStreamMap.clear();
1568 m_ObjCache.clear();
1569
1570 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
1571 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1572 m_LastXRefOffset = 0;
1573 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1574 return FORMAT_ERROR;
1575 }
1576
1577 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
1578 return SUCCESS;
1579 }
1580