1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_parser.h"
8
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_reference.h"
20 #include "core/fpdfapi/parser/cpdf_security_handler.h"
21 #include "core/fpdfapi/parser/cpdf_stream.h"
22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
23 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
24 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
25 #include "core/fxcrt/autorestorer.h"
26 #include "core/fxcrt/cfx_memorystream.h"
27 #include "core/fxcrt/fx_extension.h"
28 #include "core/fxcrt/fx_safe_types.h"
29 #include "third_party/base/ptr_util.h"
30 #include "third_party/base/stl_util.h"
31
32 namespace {
33
34 // A limit on the size of the xref table. Theoretical limits are higher, but
35 // this may be large enough in practice.
36 const int32_t kMaxXRefSize = 1048576;
37
38 constexpr FX_FILESIZE kPDFHeaderSize = 9;
39
GetVarInt(const uint8_t * p,int32_t n)40 uint32_t GetVarInt(const uint8_t* p, int32_t n) {
41 uint32_t result = 0;
42 for (int32_t i = 0; i < n; ++i)
43 result = result * 256 + p[i];
44 return result;
45 }
46
GetStreamNCount(const RetainPtr<CPDF_StreamAcc> & pObjStream)47 int32_t GetStreamNCount(const RetainPtr<CPDF_StreamAcc>& pObjStream) {
48 return pObjStream->GetDict()->GetIntegerFor("N");
49 }
50
GetStreamFirst(const RetainPtr<CPDF_StreamAcc> & pObjStream)51 int32_t GetStreamFirst(const RetainPtr<CPDF_StreamAcc>& pObjStream) {
52 return pObjStream->GetDict()->GetIntegerFor("First");
53 }
54
55 } // namespace
56
57 class CPDF_Parser::TrailerData {
58 public:
TrailerData()59 TrailerData() {}
~TrailerData()60 ~TrailerData() {}
61
GetMainTrailer() const62 CPDF_Dictionary* GetMainTrailer() const { return main_trailer_.get(); }
63
GetCombinedTrailer() const64 std::unique_ptr<CPDF_Dictionary> GetCombinedTrailer() const {
65 std::unique_ptr<CPDF_Dictionary> result =
66 ToDictionary(main_trailer_->Clone());
67
68 // Info is optional.
69 uint32_t info_obj_num = GetInfoObjNum();
70 if (info_obj_num > 0)
71 result->SetNewFor<CPDF_Reference>("Info", nullptr, GetInfoObjNum());
72
73 // Root is required.
74 result->SetNewFor<CPDF_Reference>("Root", nullptr, GetRootObjNum());
75 return result;
76 }
77
SetMainTrailer(std::unique_ptr<CPDF_Dictionary> trailer)78 void SetMainTrailer(std::unique_ptr<CPDF_Dictionary> trailer) {
79 ASSERT(trailer);
80 main_trailer_ = std::move(trailer);
81 ApplyTrailer(main_trailer_.get());
82 }
83
AppendTrailer(std::unique_ptr<CPDF_Dictionary> trailer)84 void AppendTrailer(std::unique_ptr<CPDF_Dictionary> trailer) {
85 ASSERT(trailer);
86 ApplyTrailer(trailer.get());
87 }
88
Clear()89 void Clear() {
90 main_trailer_.reset();
91 last_info_obj_num_ = 0;
92 last_root_obj_num_ = 0;
93 }
94
GetInfoObjNum() const95 uint32_t GetInfoObjNum() const {
96 const CPDF_Reference* pRef = ToReference(
97 GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Info") : nullptr);
98 return pRef ? pRef->GetRefObjNum() : last_info_obj_num_;
99 }
100
GetRootObjNum() const101 uint32_t GetRootObjNum() const {
102 const CPDF_Reference* pRef = ToReference(
103 GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Root") : nullptr);
104 return pRef ? pRef->GetRefObjNum() : last_root_obj_num_;
105 }
106
107 private:
ApplyTrailer(const CPDF_Dictionary * dict)108 void ApplyTrailer(const CPDF_Dictionary* dict) {
109 // The most recent Info object number contained in last added trailer.
110 // See PDF 1.7 spec, section 3.4.5 - Incremental Updates.
111 const auto* pRef = ToReference(dict->GetObjectFor("Info"));
112 if (pRef)
113 last_info_obj_num_ = pRef->GetRefObjNum();
114
115 const auto* pRoot = ToReference(dict->GetObjectFor("Root"));
116 if (pRoot)
117 last_root_obj_num_ = pRoot->GetRefObjNum();
118 }
119
120 std::unique_ptr<CPDF_Dictionary> main_trailer_;
121 uint32_t last_info_obj_num_ = 0;
122 uint32_t last_root_obj_num_ = 0;
123 };
124
CPDF_Parser()125 CPDF_Parser::CPDF_Parser()
126 : m_pSyntax(pdfium::MakeUnique<CPDF_SyntaxParser>()),
127 m_bHasParsed(false),
128 m_bXRefStream(false),
129 m_FileVersion(0),
130 m_TrailerData(pdfium::MakeUnique<TrailerData>()) {}
131
~CPDF_Parser()132 CPDF_Parser::~CPDF_Parser() {
133 ReleaseEncryptHandler();
134 }
135
GetLastObjNum() const136 uint32_t CPDF_Parser::GetLastObjNum() const {
137 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
138 }
139
IsValidObjectNumber(uint32_t objnum) const140 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
141 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
142 }
143
GetObjectPositionOrZero(uint32_t objnum) const144 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
145 auto it = m_ObjectInfo.find(objnum);
146 return it != m_ObjectInfo.end() ? it->second.pos : 0;
147 }
148
GetObjectType(uint32_t objnum) const149 CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const {
150 ASSERT(IsValidObjectNumber(objnum));
151 auto it = m_ObjectInfo.find(objnum);
152 return it != m_ObjectInfo.end() ? it->second.type : ObjectType::kFree;
153 }
154
GetObjectGenNum(uint32_t objnum) const155 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const {
156 ASSERT(IsValidObjectNumber(objnum));
157 auto it = m_ObjectInfo.find(objnum);
158 return it != m_ObjectInfo.end() ? it->second.gennum : 0;
159 }
160
IsObjectFreeOrNull(uint32_t objnum) const161 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
162 switch (GetObjectType(objnum)) {
163 case ObjectType::kFree:
164 case ObjectType::kNull:
165 return true;
166 case ObjectType::kNotCompressed:
167 case ObjectType::kCompressed:
168 return false;
169 }
170 ASSERT(false); // NOTREACHED();
171 return false;
172 }
173
IsObjectFree(uint32_t objnum) const174 bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
175 return GetObjectType(objnum) == ObjectType::kFree;
176 }
177
SetEncryptDictionary(CPDF_Dictionary * pDict)178 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
179 m_pEncryptDict = pDict;
180 }
181
GetFileAccess() const182 RetainPtr<IFX_SeekableReadStream> CPDF_Parser::GetFileAccess() const {
183 return m_pSyntax->GetFileAccess();
184 }
185
ShrinkObjectMap(uint32_t objnum)186 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) {
187 if (objnum == 0) {
188 m_ObjectInfo.clear();
189 return;
190 }
191
192 auto it = m_ObjectInfo.lower_bound(objnum);
193 while (it != m_ObjectInfo.end()) {
194 auto saved_it = it++;
195 m_ObjectInfo.erase(saved_it);
196 }
197
198 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
199 m_ObjectInfo[objnum - 1].pos = 0;
200 }
201
InitSyntaxParser(const RetainPtr<IFX_SeekableReadStream> & file_access)202 bool CPDF_Parser::InitSyntaxParser(
203 const RetainPtr<IFX_SeekableReadStream>& file_access) {
204 const int32_t header_offset = GetHeaderOffset(file_access);
205 if (header_offset == kInvalidHeaderOffset)
206 return false;
207 if (file_access->GetSize() < header_offset + kPDFHeaderSize)
208 return false;
209
210 m_pSyntax->InitParser(file_access, header_offset);
211 return ParseFileVersion();
212 }
213
ParseFileVersion()214 bool CPDF_Parser::ParseFileVersion() {
215 m_FileVersion = 0;
216 uint8_t ch;
217 if (!m_pSyntax->GetCharAt(5, ch))
218 return false;
219
220 if (std::isdigit(ch))
221 m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10;
222
223 if (!m_pSyntax->GetCharAt(7, ch))
224 return false;
225
226 if (std::isdigit(ch))
227 m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
228 return true;
229 }
230
StartParse(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,CPDF_Document * pDocument)231 CPDF_Parser::Error CPDF_Parser::StartParse(
232 const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
233 CPDF_Document* pDocument) {
234 if (!InitSyntaxParser(pFileAccess))
235 return FORMAT_ERROR;
236 return StartParseInternal(pDocument);
237 }
238
StartParseInternal(CPDF_Document * pDocument)239 CPDF_Parser::Error CPDF_Parser::StartParseInternal(CPDF_Document* pDocument) {
240 ASSERT(!m_bHasParsed);
241 m_bHasParsed = true;
242 m_bXRefStream = false;
243
244 m_pDocument = pDocument;
245
246 bool bXRefRebuilt = false;
247
248 m_LastXRefOffset = ParseStartXRef();
249
250 if (m_LastXRefOffset > 0) {
251 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
252 !LoadAllCrossRefV5(m_LastXRefOffset)) {
253 if (!RebuildCrossRef())
254 return FORMAT_ERROR;
255
256 bXRefRebuilt = true;
257 m_LastXRefOffset = 0;
258 }
259 } else {
260 if (!RebuildCrossRef())
261 return FORMAT_ERROR;
262
263 bXRefRebuilt = true;
264 }
265 Error eRet = SetEncryptHandler();
266 if (eRet != SUCCESS)
267 return eRet;
268
269 m_pDocument->LoadDoc();
270 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
271 if (bXRefRebuilt)
272 return FORMAT_ERROR;
273
274 ReleaseEncryptHandler();
275 if (!RebuildCrossRef())
276 return FORMAT_ERROR;
277
278 eRet = SetEncryptHandler();
279 if (eRet != SUCCESS)
280 return eRet;
281
282 m_pDocument->LoadDoc();
283 if (!m_pDocument->GetRoot())
284 return FORMAT_ERROR;
285 }
286 if (GetRootObjNum() == 0) {
287 ReleaseEncryptHandler();
288 if (!RebuildCrossRef() || GetRootObjNum() == 0)
289 return FORMAT_ERROR;
290
291 eRet = SetEncryptHandler();
292 if (eRet != SUCCESS)
293 return eRet;
294 }
295 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
296 CPDF_Reference* pMetadata =
297 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"));
298 if (pMetadata)
299 m_MetadataObjnum = pMetadata->GetRefObjNum();
300 }
301 return SUCCESS;
302 }
303
ParseStartXRef()304 FX_FILESIZE CPDF_Parser::ParseStartXRef() {
305 static constexpr char kStartXRefKeyword[] = "startxref";
306 m_pSyntax->SetPos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset -
307 strlen(kStartXRefKeyword));
308 if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
309 return 0;
310
311 // Skip "startxref" keyword.
312 m_pSyntax->GetKeyword();
313
314 // Read XRef offset.
315 bool bNumber;
316 const ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
317 if (!bNumber || xrefpos_str.IsEmpty())
318 return 0;
319
320 const FX_SAFE_FILESIZE result = FXSYS_atoi64(xrefpos_str.c_str());
321 if (!result.IsValid() || result.ValueOrDie() >= GetFileAccess()->GetSize())
322 return 0;
323
324 return result.ValueOrDie();
325 }
326
SetEncryptHandler()327 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
328 ReleaseEncryptHandler();
329 if (!GetTrailer())
330 return FORMAT_ERROR;
331
332 CPDF_Object* pEncryptObj = GetTrailer()->GetObjectFor("Encrypt");
333 if (pEncryptObj) {
334 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
335 SetEncryptDictionary(pEncryptDict);
336 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
337 pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum());
338 if (pEncryptObj)
339 SetEncryptDictionary(pEncryptObj->GetDict());
340 }
341 }
342
343 if (m_pEncryptDict) {
344 ByteString filter = m_pEncryptDict->GetStringFor("Filter");
345 if (filter != "Standard")
346 return HANDLER_ERROR;
347
348 std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler =
349 pdfium::MakeUnique<CPDF_SecurityHandler>();
350 if (!pSecurityHandler->OnInit(m_pEncryptDict.Get(), GetIDArray(),
351 m_Password))
352 return PASSWORD_ERROR;
353
354 m_pSecurityHandler = std::move(pSecurityHandler);
355 }
356 return SUCCESS;
357 }
358
ReleaseEncryptHandler()359 void CPDF_Parser::ReleaseEncryptHandler() {
360 m_pSecurityHandler.reset();
361 SetEncryptDictionary(nullptr);
362 }
363
GetObjectOffset(uint32_t objnum) const364 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const {
365 if (!IsValidObjectNumber(objnum))
366 return 0;
367
368 if (GetObjectType(objnum) == ObjectType::kNotCompressed)
369 return GetObjectPositionOrZero(objnum);
370
371 if (GetObjectType(objnum) == ObjectType::kCompressed) {
372 FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
373 return GetObjectPositionOrZero(pos);
374 }
375 return 0;
376 }
377
378 // Ideally, all the cross reference entries should be verified.
379 // In reality, we rarely see well-formed cross references don't match
380 // with the objects. crbug/602650 showed a case where object numbers
381 // in the cross reference table are all off by one.
VerifyCrossRefV4()382 bool CPDF_Parser::VerifyCrossRefV4() {
383 for (const auto& it : m_ObjectInfo) {
384 if (it.second.pos == 0)
385 continue;
386 // Find the first non-zero position.
387 FX_FILESIZE SavedPos = m_pSyntax->GetPos();
388 m_pSyntax->SetPos(it.second.pos);
389 bool is_num = false;
390 ByteString num_str = m_pSyntax->GetNextWord(&is_num);
391 m_pSyntax->SetPos(SavedPos);
392 if (!is_num || num_str.IsEmpty() ||
393 FXSYS_atoui(num_str.c_str()) != it.first) {
394 // If the object number read doesn't match the one stored,
395 // something is wrong with the cross reference table.
396 return false;
397 }
398 return true;
399 }
400 return true;
401 }
402
LoadAllCrossRefV4(FX_FILESIZE xrefpos)403 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
404 if (!LoadCrossRefV4(xrefpos, true))
405 return false;
406
407 std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
408 if (!trailer)
409 return false;
410
411 m_TrailerData->SetMainTrailer(std::move(trailer));
412 int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
413 if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
414 ShrinkObjectMap(xrefsize);
415
416 std::vector<FX_FILESIZE> CrossRefList;
417 std::vector<FX_FILESIZE> XRefStreamList;
418 std::set<FX_FILESIZE> seen_xrefpos;
419
420 CrossRefList.push_back(xrefpos);
421 XRefStreamList.push_back(GetDirectInteger(GetTrailer(), "XRefStm"));
422 seen_xrefpos.insert(xrefpos);
423
424 // When the trailer doesn't have Prev entry or Prev entry value is not
425 // numerical, GetDirectInteger() returns 0. Loading will end.
426 xrefpos = GetDirectInteger(GetTrailer(), "Prev");
427 while (xrefpos) {
428 // Check for circular references.
429 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
430 return false;
431
432 seen_xrefpos.insert(xrefpos);
433
434 // SLOW ...
435 CrossRefList.insert(CrossRefList.begin(), xrefpos);
436 LoadCrossRefV4(xrefpos, true);
437
438 std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
439 if (!pDict)
440 return false;
441
442 xrefpos = GetDirectInteger(pDict.get(), "Prev");
443
444 // SLOW ...
445 XRefStreamList.insert(XRefStreamList.begin(),
446 pDict->GetIntegerFor("XRefStm"));
447 m_TrailerData->AppendTrailer(std::move(pDict));
448 }
449
450 for (size_t i = 0; i < CrossRefList.size(); ++i) {
451 if (!LoadCrossRefV4(CrossRefList[i], false))
452 return false;
453
454 if (XRefStreamList[i] && !LoadCrossRefV5(&XRefStreamList[i], false))
455 return false;
456
457 if (i == 0 && !VerifyCrossRefV4())
458 return false;
459 }
460 return true;
461 }
462
LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos)463 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos) {
464 if (!LoadCrossRefV4(xrefpos, false))
465 return false;
466
467 std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
468 if (!trailer)
469 return false;
470
471 m_TrailerData->SetMainTrailer(std::move(trailer));
472 int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
473 if (xrefsize == 0)
474 return false;
475
476 std::vector<FX_FILESIZE> CrossRefList;
477 std::vector<FX_FILESIZE> XRefStreamList;
478 std::set<FX_FILESIZE> seen_xrefpos;
479
480 CrossRefList.push_back(xrefpos);
481 XRefStreamList.push_back(GetDirectInteger(GetTrailer(), "XRefStm"));
482 seen_xrefpos.insert(xrefpos);
483
484 xrefpos = GetDirectInteger(GetTrailer(), "Prev");
485 while (xrefpos) {
486 // Check for circular references.
487 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
488 return false;
489
490 seen_xrefpos.insert(xrefpos);
491
492 // SLOW ...
493 CrossRefList.insert(CrossRefList.begin(), xrefpos);
494 LoadCrossRefV4(xrefpos, true);
495
496 std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
497 if (!pDict)
498 return false;
499
500 xrefpos = GetDirectInteger(pDict.get(), "Prev");
501
502 // SLOW ...
503 XRefStreamList.insert(XRefStreamList.begin(),
504 pDict->GetIntegerFor("XRefStm"));
505 m_TrailerData->AppendTrailer(std::move(pDict));
506 }
507
508 for (size_t i = 1; i < CrossRefList.size(); ++i) {
509 if (!LoadCrossRefV4(CrossRefList[i], false))
510 return false;
511
512 if (XRefStreamList[i] && !LoadCrossRefV5(&XRefStreamList[i], false))
513 return false;
514 }
515 return true;
516 }
517
ParseAndAppendCrossRefSubsectionData(uint32_t start_objnum,uint32_t count,std::vector<CrossRefObjData> * out_objects)518 bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
519 uint32_t start_objnum,
520 uint32_t count,
521 std::vector<CrossRefObjData>* out_objects) {
522 // Each entry shall be exactly 20 byte.
523 // A sample entry looks like:
524 // "0000000000 00007 f\r\n"
525 static constexpr int32_t kEntryConstSize = 20;
526
527 if (!out_objects) {
528 FX_SAFE_FILESIZE pos = count;
529 pos *= kEntryConstSize;
530 pos += m_pSyntax->GetPos();
531 if (!pos.IsValid())
532 return false;
533 m_pSyntax->SetPos(pos.ValueOrDie());
534 return true;
535 }
536 const size_t start_obj_index = out_objects->size();
537 FX_SAFE_SIZE_T new_size = start_obj_index;
538 new_size += count;
539 if (!new_size.IsValid())
540 return false;
541
542 if (new_size.ValueOrDie() > kMaxXRefSize)
543 return false;
544
545 const size_t max_entries_in_file =
546 m_pSyntax->GetFileAccess()->GetSize() / kEntryConstSize;
547 if (new_size.ValueOrDie() > max_entries_in_file)
548 return false;
549
550 out_objects->resize(new_size.ValueOrDie());
551
552 std::vector<char> buf(1024 * kEntryConstSize + 1);
553 buf.back() = '\0';
554
555 int32_t nBlocks = count / 1024 + 1;
556 for (int32_t block = 0; block < nBlocks; block++) {
557 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
558 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
559 block_size * kEntryConstSize)) {
560 return false;
561 }
562
563 for (int32_t i = 0; i < block_size; i++) {
564 CrossRefObjData& obj_data =
565 (*out_objects)[start_obj_index + block * 1024 + i];
566
567 const uint32_t objnum = start_objnum + block * 1024 + i;
568
569 obj_data.obj_num = objnum;
570
571 ObjectInfo& info = obj_data.info;
572
573 char* pEntry = &buf[i * kEntryConstSize];
574 if (pEntry[17] == 'f') {
575 info.pos = 0;
576 info.type = ObjectType::kFree;
577 } else {
578 const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
579 if (!offset.IsValid())
580 return false;
581
582 if (offset.ValueOrDie() == 0) {
583 for (int32_t c = 0; c < 10; c++) {
584 if (!std::isdigit(pEntry[c]))
585 return false;
586 }
587 }
588
589 info.pos = offset.ValueOrDie();
590
591 // TODO(art-snake): The info.gennum is uint16_t, but version may be
592 // greated than max<uint16_t>. Needs solve this issue.
593 const int32_t version = FXSYS_atoi(pEntry + 11);
594 info.gennum = version;
595 info.type = ObjectType::kNotCompressed;
596 }
597 }
598 }
599 return true;
600 }
601
ParseCrossRefV4(std::vector<CrossRefObjData> * out_objects)602 bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
603 if (out_objects)
604 out_objects->clear();
605
606 if (m_pSyntax->GetKeyword() != "xref")
607 return false;
608 std::vector<CrossRefObjData> result_objects;
609 while (1) {
610 FX_FILESIZE SavedPos = m_pSyntax->GetPos();
611 bool bIsNumber;
612 ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
613 if (word.IsEmpty()) {
614 return false;
615 }
616
617 if (!bIsNumber) {
618 m_pSyntax->SetPos(SavedPos);
619 break;
620 }
621
622 uint32_t start_objnum = FXSYS_atoui(word.c_str());
623 if (start_objnum >= kMaxObjectNumber)
624 return false;
625
626 uint32_t count = m_pSyntax->GetDirectNum();
627 m_pSyntax->ToNextWord();
628 SavedPos = m_pSyntax->GetPos();
629
630 if (!ParseAndAppendCrossRefSubsectionData(
631 start_objnum, count, out_objects ? &result_objects : nullptr)) {
632 return false;
633 }
634 }
635 if (out_objects)
636 *out_objects = std::move(result_objects);
637 return true;
638 }
639
LoadCrossRefV4(FX_FILESIZE pos,bool bSkip)640 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
641 bool bSkip) {
642 m_pSyntax->SetPos(pos);
643 std::vector<CrossRefObjData> objects;
644 if (!ParseCrossRefV4(bSkip ? nullptr : &objects))
645 return false;
646
647 MergeCrossRefObjectsData(objects);
648
649 return true;
650 }
651
MergeCrossRefObjectsData(const std::vector<CrossRefObjData> & objects)652 void CPDF_Parser::MergeCrossRefObjectsData(
653 const std::vector<CrossRefObjData>& objects) {
654 for (const auto& obj : objects) {
655 m_ObjectInfo[obj.obj_num] = obj.info;
656 }
657 }
658
LoadAllCrossRefV5(FX_FILESIZE xrefpos)659 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
660 if (!LoadCrossRefV5(&xrefpos, true))
661 return false;
662
663 std::set<FX_FILESIZE> seen_xrefpos;
664 while (xrefpos) {
665 seen_xrefpos.insert(xrefpos);
666 if (!LoadCrossRefV5(&xrefpos, false))
667 return false;
668
669 // Check for circular references.
670 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
671 return false;
672 }
673 m_ObjectStreamMap.clear();
674 m_bXRefStream = true;
675 return true;
676 }
677
RebuildCrossRef()678 bool CPDF_Parser::RebuildCrossRef() {
679 m_ObjectInfo.clear();
680 m_TrailerData->Clear();
681
682 ParserState state = ParserState::kDefault;
683 int32_t inside_index = 0;
684 uint32_t objnum = 0;
685 uint32_t gennum = 0;
686 int32_t depth = 0;
687 const uint32_t kBufferSize = 4096;
688 std::vector<uint8_t> buffer(kBufferSize);
689
690 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
691 FX_FILESIZE start_pos = 0;
692 FX_FILESIZE start_pos1 = 0;
693 FX_FILESIZE last_obj = -1;
694 FX_FILESIZE last_xref = -1;
695 FX_FILESIZE last_trailer = -1;
696
697 while (pos < m_pSyntax->m_FileLen) {
698 const FX_FILESIZE saved_pos = pos;
699 bool bOverFlow = false;
700 uint32_t size =
701 std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize);
702 if (!m_pSyntax->GetFileAccess()->ReadBlock(buffer.data(), pos, size))
703 break;
704
705 for (uint32_t i = 0; i < size; i++) {
706 uint8_t byte = buffer[i];
707 switch (state) {
708 case ParserState::kDefault:
709 if (PDFCharIsWhitespace(byte)) {
710 state = ParserState::kWhitespace;
711 } else if (std::isdigit(byte)) {
712 --i;
713 state = ParserState::kWhitespace;
714 } else if (byte == '%') {
715 inside_index = 0;
716 state = ParserState::kComment;
717 } else if (byte == '(') {
718 state = ParserState::kString;
719 depth = 1;
720 } else if (byte == '<') {
721 inside_index = 1;
722 state = ParserState::kHexString;
723 } else if (byte == '\\') {
724 state = ParserState::kEscapedString;
725 } else if (byte == 't') {
726 state = ParserState::kTrailer;
727 inside_index = 1;
728 }
729 break;
730
731 case ParserState::kWhitespace:
732 if (std::isdigit(byte)) {
733 start_pos = pos + i;
734 state = ParserState::kObjNum;
735 objnum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
736 } else if (byte == 't') {
737 state = ParserState::kTrailer;
738 inside_index = 1;
739 } else if (byte == 'x') {
740 state = ParserState::kXref;
741 inside_index = 1;
742 } else if (!PDFCharIsWhitespace(byte)) {
743 --i;
744 state = ParserState::kDefault;
745 }
746 break;
747
748 case ParserState::kObjNum:
749 if (std::isdigit(byte)) {
750 objnum = objnum * 10 +
751 FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
752 } else if (PDFCharIsWhitespace(byte)) {
753 state = ParserState::kPostObjNum;
754 } else {
755 --i;
756 state = ParserState::kEndObj;
757 inside_index = 0;
758 }
759 break;
760
761 case ParserState::kPostObjNum:
762 if (std::isdigit(byte)) {
763 start_pos1 = pos + i;
764 state = ParserState::kGenNum;
765 gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
766 } else if (byte == 't') {
767 state = ParserState::kTrailer;
768 inside_index = 1;
769 } else if (!PDFCharIsWhitespace(byte)) {
770 --i;
771 state = ParserState::kDefault;
772 }
773 break;
774
775 case ParserState::kGenNum:
776 if (std::isdigit(byte)) {
777 gennum = gennum * 10 +
778 FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
779 } else if (PDFCharIsWhitespace(byte)) {
780 state = ParserState::kPostGenNum;
781 } else {
782 --i;
783 state = ParserState::kDefault;
784 }
785 break;
786
787 case ParserState::kPostGenNum:
788 if (byte == 'o') {
789 state = ParserState::kBeginObj;
790 inside_index = 1;
791 } else if (std::isdigit(byte)) {
792 objnum = gennum;
793 gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
794 start_pos = start_pos1;
795 start_pos1 = pos + i;
796 state = ParserState::kGenNum;
797 } else if (byte == 't') {
798 state = ParserState::kTrailer;
799 inside_index = 1;
800 } else if (!PDFCharIsWhitespace(byte)) {
801 --i;
802 state = ParserState::kDefault;
803 }
804 break;
805
806 case ParserState::kBeginObj:
807 switch (inside_index) {
808 case 1:
809 if (byte != 'b') {
810 --i;
811 state = ParserState::kDefault;
812 } else {
813 inside_index++;
814 }
815 break;
816 case 2:
817 if (byte != 'j') {
818 --i;
819 state = ParserState::kDefault;
820 } else {
821 inside_index++;
822 }
823 break;
824 case 3:
825 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
826 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
827 last_obj = start_pos;
828 FX_FILESIZE obj_end = 0;
829 std::unique_ptr<CPDF_Object> pObject =
830 ParseIndirectObjectAtByStrict(m_pDocument.Get(), obj_pos,
831 objnum, &obj_end);
832 if (CPDF_Stream* pStream = ToStream(pObject.get())) {
833 if (CPDF_Dictionary* pDict = pStream->GetDict()) {
834 if ((pDict->KeyExist("Type")) &&
835 (pDict->GetStringFor("Type") == "XRef" &&
836 pDict->KeyExist("Size"))) {
837 CPDF_Object* pRoot = pDict->GetObjectFor("Root");
838 if (pRoot && pRoot->GetDict() &&
839 pRoot->GetDict()->GetObjectFor("Pages")) {
840 m_TrailerData->SetMainTrailer(
841 ToDictionary(pDict->Clone()));
842 }
843 }
844 }
845 }
846
847 FX_FILESIZE offset = 0;
848 m_pSyntax->SetPos(obj_pos);
849 offset = m_pSyntax->FindTag("obj", 0);
850 if (offset == -1)
851 offset = 0;
852 else
853 offset += 3;
854
855 FX_FILESIZE nLen = obj_end - obj_pos - offset;
856 if ((uint32_t)nLen > size - i) {
857 pos = obj_end + m_pSyntax->m_HeaderOffset;
858 bOverFlow = true;
859 } else {
860 i += (uint32_t)nLen;
861 }
862
863 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
864 m_ObjectInfo[objnum].pos) {
865 if (pObject) {
866 m_ObjectInfo[objnum].pos = obj_pos;
867 m_ObjectInfo[objnum].gennum = gennum;
868 }
869 } else {
870 m_ObjectInfo[objnum].pos = obj_pos;
871 m_ObjectInfo[objnum].type = ObjectType::kNotCompressed;
872 m_ObjectInfo[objnum].gennum = gennum;
873 }
874 }
875 --i;
876 state = ParserState::kDefault;
877 break;
878 }
879 break;
880
881 case ParserState::kTrailer:
882 if (inside_index == 7) {
883 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
884 last_trailer = pos + i - 7;
885 m_pSyntax->SetPos(pos + i - m_pSyntax->m_HeaderOffset);
886
887 std::unique_ptr<CPDF_Object> pObj =
888 m_pSyntax->GetObjectBody(m_pDocument.Get());
889 if (pObj) {
890 if (pObj->IsDictionary() || pObj->AsStream()) {
891 CPDF_Stream* pStream = pObj->AsStream();
892 if (CPDF_Dictionary* pTrailer =
893 pStream ? pStream->GetDict() : pObj->AsDictionary()) {
894 if (GetTrailer()) {
895 CPDF_Object* pRoot = pTrailer->GetObjectFor("Root");
896 CPDF_Reference* pRef = ToReference(pRoot);
897 if (!pRoot ||
898 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
899 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
900 auto it = pTrailer->begin();
901 while (it != pTrailer->end()) {
902 const ByteString& key = it->first;
903 CPDF_Object* pElement = it->second.get();
904 ++it;
905 uint32_t dwObjNum =
906 pElement ? pElement->GetObjNum() : 0;
907 if (dwObjNum) {
908 GetTrailer()->SetNewFor<CPDF_Reference>(
909 key, m_pDocument.Get(), dwObjNum);
910 } else {
911 GetTrailer()->SetFor(key, pElement->Clone());
912 }
913 }
914 }
915 } else {
916 m_TrailerData->SetMainTrailer(
917 ToDictionary(pObj->IsStream() ? pTrailer->Clone()
918 : std::move(pObj)));
919
920 FX_FILESIZE dwSavePos = m_pSyntax->GetPos();
921 ByteString strWord = m_pSyntax->GetKeyword();
922 if (!strWord.Compare("startxref")) {
923 bool bNumber;
924 ByteString bsOffset = m_pSyntax->GetNextWord(&bNumber);
925 if (bNumber)
926 m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str());
927 }
928 m_pSyntax->SetPos(dwSavePos);
929 }
930 }
931 }
932 }
933 }
934 --i;
935 state = ParserState::kDefault;
936 } else if (byte == "trailer"[inside_index]) {
937 inside_index++;
938 } else {
939 --i;
940 state = ParserState::kDefault;
941 }
942 break;
943
944 case ParserState::kXref:
945 if (inside_index == 4) {
946 last_xref = pos + i - 4;
947 state = ParserState::kWhitespace;
948 } else if (byte == "xref"[inside_index]) {
949 inside_index++;
950 } else {
951 --i;
952 state = ParserState::kDefault;
953 }
954 break;
955
956 case ParserState::kComment:
957 if (PDFCharIsLineEnding(byte))
958 state = ParserState::kDefault;
959 break;
960
961 case ParserState::kString:
962 if (byte == ')') {
963 if (depth > 0)
964 depth--;
965 } else if (byte == '(') {
966 depth++;
967 }
968
969 if (!depth)
970 state = ParserState::kDefault;
971 break;
972
973 case ParserState::kHexString:
974 if (byte == '>' || (byte == '<' && inside_index == 1))
975 state = ParserState::kDefault;
976 inside_index = 0;
977 break;
978
979 case ParserState::kEscapedString:
980 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
981 --i;
982 state = ParserState::kDefault;
983 }
984 break;
985
986 case ParserState::kEndObj:
987 if (PDFCharIsWhitespace(byte)) {
988 state = ParserState::kDefault;
989 } else if (byte == '%' || byte == '(' || byte == '<' ||
990 byte == '\\') {
991 state = ParserState::kDefault;
992 --i;
993 } else if (inside_index == 6) {
994 state = ParserState::kDefault;
995 --i;
996 } else if (byte == "endobj"[inside_index]) {
997 inside_index++;
998 }
999 break;
1000 }
1001
1002 if (bOverFlow) {
1003 size = 0;
1004 break;
1005 }
1006 }
1007 pos += size;
1008
1009 // If the position has not changed at all or went backwards in a loop
1010 // iteration, then break out to prevent infinite looping.
1011 if (pos <= saved_pos)
1012 break;
1013 }
1014
1015 if (last_xref != -1 && last_xref > last_obj)
1016 last_trailer = last_xref;
1017 else if (last_trailer == -1 || last_xref < last_obj)
1018 last_trailer = m_pSyntax->m_FileLen;
1019
1020 return GetTrailer() && !m_ObjectInfo.empty();
1021 }
1022
LoadCrossRefV5(FX_FILESIZE * pos,bool bMainXRef)1023 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
1024 std::unique_ptr<CPDF_Object> pObject(
1025 ParseIndirectObjectAt(m_pDocument.Get(), *pos, 0));
1026 if (!pObject)
1027 return false;
1028
1029 uint32_t objnum = pObject->GetObjNum();
1030 if (!objnum)
1031 return false;
1032
1033 CPDF_Object* pUnownedObject = pObject.get();
1034 if (m_pDocument) {
1035 const CPDF_Dictionary* pRootDict = m_pDocument->GetRoot();
1036 if (pRootDict && pRootDict->GetObjNum() == objnum)
1037 return false;
1038 if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
1039 objnum, std::move(pObject))) {
1040 return false;
1041 }
1042 }
1043
1044 CPDF_Stream* pStream = pUnownedObject->AsStream();
1045 if (!pStream)
1046 return false;
1047
1048 CPDF_Dictionary* pDict = pStream->GetDict();
1049 *pos = pDict->GetIntegerFor("Prev");
1050 int32_t size = pDict->GetIntegerFor("Size");
1051 if (size < 0)
1052 return false;
1053
1054 std::unique_ptr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
1055 if (bMainXRef) {
1056 m_TrailerData->SetMainTrailer(std::move(pNewTrailer));
1057 ShrinkObjectMap(size);
1058 for (auto& it : m_ObjectInfo)
1059 it.second.type = ObjectType::kFree;
1060 } else {
1061 m_TrailerData->AppendTrailer(std::move(pNewTrailer));
1062 }
1063
1064 std::vector<std::pair<int32_t, int32_t>> arrIndex;
1065 CPDF_Array* pArray = pDict->GetArrayFor("Index");
1066 if (pArray) {
1067 for (size_t i = 0; i < pArray->GetCount() / 2; i++) {
1068 CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2);
1069 CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1);
1070
1071 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
1072 int nStartNum = pStartNumObj->GetInteger();
1073 int nCount = pCountObj->GetInteger();
1074 if (nStartNum >= 0 && nCount > 0)
1075 arrIndex.push_back(std::make_pair(nStartNum, nCount));
1076 }
1077 }
1078 }
1079
1080 if (arrIndex.size() == 0)
1081 arrIndex.push_back(std::make_pair(0, size));
1082
1083 pArray = pDict->GetArrayFor("W");
1084 if (!pArray)
1085 return false;
1086
1087 std::vector<uint32_t> WidthArray;
1088 FX_SAFE_UINT32 dwAccWidth = 0;
1089 for (size_t i = 0; i < pArray->GetCount(); ++i) {
1090 WidthArray.push_back(pArray->GetIntegerAt(i));
1091 dwAccWidth += WidthArray[i];
1092 }
1093
1094 if (!dwAccWidth.IsValid() || WidthArray.size() < 3)
1095 return false;
1096
1097 uint32_t totalWidth = dwAccWidth.ValueOrDie();
1098 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
1099 pAcc->LoadAllDataFiltered();
1100
1101 const uint8_t* pData = pAcc->GetData();
1102 uint32_t dwTotalSize = pAcc->GetSize();
1103 uint32_t segindex = 0;
1104 for (uint32_t i = 0; i < arrIndex.size(); i++) {
1105 int32_t startnum = arrIndex[i].first;
1106 if (startnum < 0)
1107 continue;
1108
1109 uint32_t count = pdfium::base::checked_cast<uint32_t>(arrIndex[i].second);
1110 FX_SAFE_UINT32 dwCaculatedSize = segindex;
1111 dwCaculatedSize += count;
1112 dwCaculatedSize *= totalWidth;
1113 if (!dwCaculatedSize.IsValid() ||
1114 dwCaculatedSize.ValueOrDie() > dwTotalSize) {
1115 continue;
1116 }
1117
1118 const uint8_t* segstart = pData + segindex * totalWidth;
1119 FX_SAFE_UINT32 dwMaxObjNum = startnum;
1120 dwMaxObjNum += count;
1121 uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
1122 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
1123 continue;
1124
1125 for (uint32_t j = 0; j < count; j++) {
1126 ObjectType type = ObjectType::kNotCompressed;
1127 const uint8_t* entrystart = segstart + j * totalWidth;
1128 if (WidthArray[0]) {
1129 const int cross_ref_stream_obj_type =
1130 GetVarInt(entrystart, WidthArray[0]);
1131 type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
1132 }
1133
1134 if (GetObjectType(startnum + j) == ObjectType::kNull) {
1135 FX_FILESIZE offset =
1136 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1137 m_ObjectInfo[startnum + j].pos = offset;
1138 continue;
1139 }
1140
1141 if (GetObjectType(startnum + j) != ObjectType::kFree)
1142 continue;
1143
1144 ObjectInfo& info = m_ObjectInfo[startnum + j];
1145
1146 info.type = type;
1147 if (type == ObjectType::kFree) {
1148 info.pos = 0;
1149 } else {
1150 const FX_FILESIZE entry_value =
1151 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1152 if (type == ObjectType::kNotCompressed) {
1153 const auto object_offset = entry_value;
1154 info.pos = object_offset;
1155 } else {
1156 const auto archive_obj_num = entry_value;
1157 info.archive_obj_num = archive_obj_num;
1158 if (archive_obj_num < 0 || !IsValidObjectNumber(archive_obj_num))
1159 return false;
1160 m_ObjectInfo[archive_obj_num].type = ObjectType::kNull;
1161 }
1162 }
1163 }
1164 segindex += count;
1165 }
1166 return true;
1167 }
1168
GetIDArray() const1169 const CPDF_Array* CPDF_Parser::GetIDArray() const {
1170 return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr;
1171 }
1172
GetTrailer() const1173 CPDF_Dictionary* CPDF_Parser::GetTrailer() const {
1174 return m_TrailerData->GetMainTrailer();
1175 }
1176
GetCombinedTrailer() const1177 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
1178 return m_TrailerData->GetCombinedTrailer();
1179 }
1180
GetInfoObjNum()1181 uint32_t CPDF_Parser::GetInfoObjNum() {
1182 return m_TrailerData->GetInfoObjNum();
1183 }
1184
GetRootObjNum()1185 uint32_t CPDF_Parser::GetRootObjNum() {
1186 return m_TrailerData->GetRootObjNum();
1187 }
1188
ParseIndirectObject(CPDF_IndirectObjectHolder * pObjList,uint32_t objnum)1189 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject(
1190 CPDF_IndirectObjectHolder* pObjList,
1191 uint32_t objnum) {
1192 if (!IsValidObjectNumber(objnum))
1193 return nullptr;
1194
1195 // Prevent circular parsing the same object.
1196 if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
1197 return nullptr;
1198
1199 pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
1200 if (GetObjectType(objnum) == ObjectType::kNotCompressed ||
1201 GetObjectType(objnum) == ObjectType::kNull) {
1202 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1203 if (pos <= 0)
1204 return nullptr;
1205 return ParseIndirectObjectAt(pObjList, pos, objnum);
1206 }
1207 if (GetObjectType(objnum) != ObjectType::kCompressed)
1208 return nullptr;
1209
1210 RetainPtr<CPDF_StreamAcc> pObjStream =
1211 GetObjectStream(m_ObjectInfo[objnum].pos);
1212 if (!pObjStream)
1213 return nullptr;
1214
1215 auto file = pdfium::MakeRetain<CFX_MemoryStream>(
1216 const_cast<uint8_t*>(pObjStream->GetData()),
1217 static_cast<size_t>(pObjStream->GetSize()), false);
1218 CPDF_SyntaxParser syntax;
1219 syntax.InitParser(file, 0);
1220 const int32_t offset = GetStreamFirst(pObjStream);
1221
1222 // Read object numbers from |pObjStream| into a cache.
1223 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
1224 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
1225 uint32_t thisnum = syntax.GetDirectNum();
1226 uint32_t thisoff = syntax.GetDirectNum();
1227 m_ObjCache[pObjStream][thisnum] = thisoff;
1228 }
1229 }
1230
1231 const auto it = m_ObjCache[pObjStream].find(objnum);
1232 if (it == m_ObjCache[pObjStream].end())
1233 return nullptr;
1234
1235 syntax.SetPos(offset + it->second);
1236 return syntax.GetObjectBody(pObjList);
1237 }
1238
GetObjectStream(uint32_t objnum)1239 RetainPtr<CPDF_StreamAcc> CPDF_Parser::GetObjectStream(uint32_t objnum) {
1240 auto it = m_ObjectStreamMap.find(objnum);
1241 if (it != m_ObjectStreamMap.end())
1242 return it->second;
1243
1244 if (!m_pDocument)
1245 return nullptr;
1246
1247 const CPDF_Stream* pStream =
1248 ToStream(m_pDocument->GetOrParseIndirectObject(objnum));
1249 if (!pStream)
1250 return nullptr;
1251
1252 auto pStreamAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
1253 pStreamAcc->LoadAllDataFiltered();
1254 m_ObjectStreamMap[objnum] = pStreamAcc;
1255 return pStreamAcc;
1256 }
1257
ParseIndirectObjectAt(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,uint32_t objnum)1258 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(
1259 CPDF_IndirectObjectHolder* pObjList,
1260 FX_FILESIZE pos,
1261 uint32_t objnum) {
1262 return ParseIndirectObjectAtInternal(
1263 pObjList, pos, objnum, CPDF_SyntaxParser::ParseType::kLoose, nullptr);
1264 }
1265
ParseIndirectObjectAtInternal(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,uint32_t objnum,CPDF_SyntaxParser::ParseType parse_type,FX_FILESIZE * pResultPos)1266 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtInternal(
1267 CPDF_IndirectObjectHolder* pObjList,
1268 FX_FILESIZE pos,
1269 uint32_t objnum,
1270 CPDF_SyntaxParser::ParseType parse_type,
1271 FX_FILESIZE* pResultPos) {
1272 const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
1273 m_pSyntax->SetPos(pos);
1274 auto result = m_pSyntax->GetIndirectObject(pObjList, parse_type);
1275
1276 if (pResultPos)
1277 *pResultPos = m_pSyntax->GetPos();
1278 m_pSyntax->SetPos(saved_pos);
1279
1280 if (result && objnum && result->GetObjNum() != objnum)
1281 return nullptr;
1282
1283 const bool should_decrypt = m_pSecurityHandler &&
1284 m_pSecurityHandler->GetCryptoHandler() &&
1285 objnum != m_MetadataObjnum;
1286 if (should_decrypt)
1287 result = m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(
1288 std::move(result));
1289
1290 return result;
1291 }
1292
ParseIndirectObjectAtByStrict(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,uint32_t objnum,FX_FILESIZE * pResultPos)1293 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtByStrict(
1294 CPDF_IndirectObjectHolder* pObjList,
1295 FX_FILESIZE pos,
1296 uint32_t objnum,
1297 FX_FILESIZE* pResultPos) {
1298 return ParseIndirectObjectAtInternal(
1299 pObjList, pos, objnum, CPDF_SyntaxParser::ParseType::kStrict, pResultPos);
1300 }
1301
GetFirstPageNo() const1302 uint32_t CPDF_Parser::GetFirstPageNo() const {
1303 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1304 }
1305
LoadTrailerV4()1306 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
1307 if (m_pSyntax->GetKeyword() != "trailer")
1308 return nullptr;
1309
1310 return ToDictionary(m_pSyntax->GetObjectBody(m_pDocument.Get()));
1311 }
1312
GetPermissions() const1313 uint32_t CPDF_Parser::GetPermissions() const {
1314 if (!m_pSecurityHandler)
1315 return 0xFFFFFFFF;
1316
1317 uint32_t dwPermission = m_pSecurityHandler->GetPermissions();
1318 if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") {
1319 // See PDF Reference 1.7, page 123, table 3.20.
1320 dwPermission &= 0xFFFFFFFC;
1321 dwPermission |= 0xFFFFF0C0;
1322 }
1323 return dwPermission;
1324 }
1325
ParseLinearizedHeader()1326 std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() {
1327 return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
1328 }
1329
StartLinearizedParse(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,CPDF_Document * pDocument)1330 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
1331 const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
1332 CPDF_Document* pDocument) {
1333 ASSERT(!m_bHasParsed);
1334 m_bXRefStream = false;
1335 m_LastXRefOffset = 0;
1336
1337 if (!InitSyntaxParser(pFileAccess))
1338 return FORMAT_ERROR;
1339
1340 m_pLinearized = ParseLinearizedHeader();
1341 if (!m_pLinearized)
1342 return StartParseInternal(std::move(pDocument));
1343
1344 m_bHasParsed = true;
1345 m_pDocument = pDocument;
1346
1347 m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
1348 FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
1349 bool bXRefRebuilt = false;
1350 bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
1351 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
1352 if (!RebuildCrossRef())
1353 return FORMAT_ERROR;
1354
1355 bXRefRebuilt = true;
1356 m_LastXRefOffset = 0;
1357 }
1358 if (bLoadV4) {
1359 std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
1360 if (!trailer)
1361 return SUCCESS;
1362
1363 m_TrailerData->SetMainTrailer(std::move(trailer));
1364 int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
1365 if (xrefsize > 0)
1366 ShrinkObjectMap(xrefsize);
1367 }
1368
1369 Error eRet = SetEncryptHandler();
1370 if (eRet != SUCCESS)
1371 return eRet;
1372
1373 m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
1374 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
1375 if (bXRefRebuilt)
1376 return FORMAT_ERROR;
1377
1378 ReleaseEncryptHandler();
1379 if (!RebuildCrossRef())
1380 return FORMAT_ERROR;
1381
1382 eRet = SetEncryptHandler();
1383 if (eRet != SUCCESS)
1384 return eRet;
1385
1386 m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
1387 if (!m_pDocument->GetRoot())
1388 return FORMAT_ERROR;
1389 }
1390
1391 if (GetRootObjNum() == 0) {
1392 ReleaseEncryptHandler();
1393 if (!RebuildCrossRef() || GetRootObjNum() == 0)
1394 return FORMAT_ERROR;
1395
1396 eRet = SetEncryptHandler();
1397 if (eRet != SUCCESS)
1398 return eRet;
1399 }
1400
1401 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1402 if (CPDF_Reference* pMetadata =
1403 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")))
1404 m_MetadataObjnum = pMetadata->GetRefObjNum();
1405 }
1406 return SUCCESS;
1407 }
1408
LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)1409 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
1410 if (!LoadCrossRefV5(&xrefpos, false))
1411 return false;
1412
1413 std::set<FX_FILESIZE> seen_xrefpos;
1414 while (xrefpos) {
1415 seen_xrefpos.insert(xrefpos);
1416 if (!LoadCrossRefV5(&xrefpos, false))
1417 return false;
1418
1419 // Check for circular references.
1420 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
1421 return false;
1422 }
1423 m_ObjectStreamMap.clear();
1424 m_bXRefStream = true;
1425 return true;
1426 }
1427
LoadLinearizedMainXRefTable()1428 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1429 const FX_SAFE_FILESIZE main_xref_offset = GetTrailer()->GetIntegerFor("Prev");
1430 if (!main_xref_offset.IsValid())
1431 return FORMAT_ERROR;
1432
1433 if (main_xref_offset.ValueOrDie() == 0)
1434 return SUCCESS;
1435
1436 const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
1437 m_MetadataObjnum = 0;
1438 m_ObjectStreamMap.clear();
1439 m_ObjCache.clear();
1440
1441 if (!LoadLinearizedAllCrossRefV4(main_xref_offset.ValueOrDie()) &&
1442 !LoadLinearizedAllCrossRefV5(main_xref_offset.ValueOrDie())) {
1443 m_LastXRefOffset = 0;
1444 return FORMAT_ERROR;
1445 }
1446
1447 return SUCCESS;
1448 }
1449
GetObjectTypeFromCrossRefStreamType(int cross_ref_stream_type) const1450 CPDF_Parser::ObjectType CPDF_Parser::GetObjectTypeFromCrossRefStreamType(
1451 int cross_ref_stream_type) const {
1452 switch (cross_ref_stream_type) {
1453 case 0:
1454 return CPDF_Parser::ObjectType::kFree;
1455 case 1:
1456 return CPDF_Parser::ObjectType::kNotCompressed;
1457 case 2:
1458 return CPDF_Parser::ObjectType::kCompressed;
1459 default:
1460 return CPDF_Parser::ObjectType::kNull;
1461 }
1462 }
1463