1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_parser.h"
8
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_object_stream.h"
20 #include "core/fpdfapi/parser/cpdf_read_validator.h"
21 #include "core/fpdfapi/parser/cpdf_reference.h"
22 #include "core/fpdfapi/parser/cpdf_security_handler.h"
23 #include "core/fpdfapi/parser/cpdf_stream.h"
24 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
25 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcrt/autorestorer.h"
28 #include "core/fxcrt/fx_extension.h"
29 #include "core/fxcrt/fx_safe_types.h"
30 #include "third_party/base/ptr_util.h"
31 #include "third_party/base/stl_util.h"
32
33 namespace {
34
35 // A limit on the size of the xref table. Theoretical limits are higher, but
36 // this may be large enough in practice.
37 const int32_t kMaxXRefSize = 1048576;
38
39 // "%PDF-1.7\n"
40 constexpr FX_FILESIZE kPDFHeaderSize = 9;
41
GetVarInt(const uint8_t * p,int32_t n)42 uint32_t GetVarInt(const uint8_t* p, int32_t n) {
43 uint32_t result = 0;
44 for (int32_t i = 0; i < n; ++i)
45 result = result * 256 + p[i];
46 return result;
47 }
48
49 class ObjectsHolderStub final : public CPDF_Parser::ParsedObjectsHolder {
50 public:
51 ObjectsHolderStub() = default;
52 ~ObjectsHolderStub() override = default;
TryInit()53 bool TryInit() override { return true; }
54 };
55
56 } // namespace
57
CPDF_Parser(ParsedObjectsHolder * holder)58 CPDF_Parser::CPDF_Parser(ParsedObjectsHolder* holder)
59 : m_pObjectsHolder(holder),
60 m_CrossRefTable(pdfium::MakeUnique<CPDF_CrossRefTable>()) {
61 if (!holder) {
62 m_pOwnedObjectsHolder = pdfium::MakeUnique<ObjectsHolderStub>();
63 m_pObjectsHolder = m_pOwnedObjectsHolder.get();
64 }
65 }
66
CPDF_Parser()67 CPDF_Parser::CPDF_Parser() : CPDF_Parser(nullptr) {}
68
~CPDF_Parser()69 CPDF_Parser::~CPDF_Parser() {
70 ReleaseEncryptHandler();
71 }
72
GetLastObjNum() const73 uint32_t CPDF_Parser::GetLastObjNum() const {
74 return m_CrossRefTable->objects_info().empty()
75 ? 0
76 : m_CrossRefTable->objects_info().rbegin()->first;
77 }
78
IsValidObjectNumber(uint32_t objnum) const79 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
80 return objnum <= GetLastObjNum();
81 }
82
GetObjectPositionOrZero(uint32_t objnum) const83 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
84 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
85 return (info && info->type == ObjectType::kNormal) ? info->pos : 0;
86 }
87
GetObjectType(uint32_t objnum) const88 CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const {
89 ASSERT(IsValidObjectNumber(objnum));
90 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
91 return info ? info->type : ObjectType::kFree;
92 }
93
IsObjectFreeOrNull(uint32_t objnum) const94 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
95 switch (GetObjectType(objnum)) {
96 case ObjectType::kFree:
97 case ObjectType::kNull:
98 return true;
99 case ObjectType::kNotCompressed:
100 case ObjectType::kCompressed:
101 return false;
102 }
103 NOTREACHED();
104 return false;
105 }
106
IsObjectFree(uint32_t objnum) const107 bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
108 return GetObjectType(objnum) == ObjectType::kFree;
109 }
110
ShrinkObjectMap(uint32_t size)111 void CPDF_Parser::ShrinkObjectMap(uint32_t size) {
112 m_CrossRefTable->ShrinkObjectMap(size);
113 }
114
InitSyntaxParser(const RetainPtr<CPDF_ReadValidator> & validator)115 bool CPDF_Parser::InitSyntaxParser(
116 const RetainPtr<CPDF_ReadValidator>& validator) {
117 const Optional<FX_FILESIZE> header_offset = GetHeaderOffset(validator);
118 if (!header_offset)
119 return false;
120 if (validator->GetSize() < *header_offset + kPDFHeaderSize)
121 return false;
122
123 m_pSyntax = pdfium::MakeUnique<CPDF_SyntaxParser>(validator, *header_offset);
124 return ParseFileVersion();
125 }
126
ParseFileVersion()127 bool CPDF_Parser::ParseFileVersion() {
128 m_FileVersion = 0;
129 uint8_t ch;
130 if (!m_pSyntax->GetCharAt(5, ch))
131 return false;
132
133 if (std::isdigit(ch))
134 m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10;
135
136 if (!m_pSyntax->GetCharAt(7, ch))
137 return false;
138
139 if (std::isdigit(ch))
140 m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
141 return true;
142 }
143
StartParse(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,const char * password)144 CPDF_Parser::Error CPDF_Parser::StartParse(
145 const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
146 const char* password) {
147 if (!InitSyntaxParser(
148 pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr)))
149 return FORMAT_ERROR;
150 SetPassword(password);
151 return StartParseInternal();
152 }
153
StartParseInternal()154 CPDF_Parser::Error CPDF_Parser::StartParseInternal() {
155 ASSERT(!m_bHasParsed);
156 ASSERT(!m_bXRefTableRebuilt);
157 m_bHasParsed = true;
158 m_bXRefStream = false;
159
160 m_LastXRefOffset = ParseStartXRef();
161 if (m_LastXRefOffset >= kPDFHeaderSize) {
162 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
163 !LoadAllCrossRefV5(m_LastXRefOffset)) {
164 if (!RebuildCrossRef())
165 return FORMAT_ERROR;
166
167 m_bXRefTableRebuilt = true;
168 m_LastXRefOffset = 0;
169 }
170 } else {
171 if (!RebuildCrossRef())
172 return FORMAT_ERROR;
173
174 m_bXRefTableRebuilt = true;
175 }
176 Error eRet = SetEncryptHandler();
177 if (eRet != SUCCESS)
178 return eRet;
179
180 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
181 if (m_bXRefTableRebuilt)
182 return FORMAT_ERROR;
183
184 ReleaseEncryptHandler();
185 if (!RebuildCrossRef())
186 return FORMAT_ERROR;
187
188 eRet = SetEncryptHandler();
189 if (eRet != SUCCESS)
190 return eRet;
191
192 m_pObjectsHolder->TryInit();
193 if (!GetRoot())
194 return FORMAT_ERROR;
195 }
196 if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) {
197 ReleaseEncryptHandler();
198 if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum)
199 return FORMAT_ERROR;
200
201 eRet = SetEncryptHandler();
202 if (eRet != SUCCESS)
203 return eRet;
204 }
205 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
206 CPDF_Reference* pMetadata =
207 ToReference(GetRoot()->GetObjectFor("Metadata"));
208 if (pMetadata)
209 m_MetadataObjnum = pMetadata->GetRefObjNum();
210 }
211 return SUCCESS;
212 }
213
ParseStartXRef()214 FX_FILESIZE CPDF_Parser::ParseStartXRef() {
215 static constexpr char kStartXRefKeyword[] = "startxref";
216 m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword));
217 if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
218 return 0;
219
220 // Skip "startxref" keyword.
221 m_pSyntax->GetKeyword();
222
223 // Read XRef offset.
224 bool bNumber;
225 const ByteString xref_offset_str = m_pSyntax->GetNextWord(&bNumber);
226 if (!bNumber || xref_offset_str.IsEmpty())
227 return 0;
228
229 const FX_SAFE_FILESIZE result = FXSYS_atoi64(xref_offset_str.c_str());
230 if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize())
231 return 0;
232
233 return result.ValueOrDie();
234 }
235
SetEncryptHandler()236 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
237 ReleaseEncryptHandler();
238 if (!GetTrailer())
239 return FORMAT_ERROR;
240
241 const CPDF_Dictionary* pEncryptDict = GetEncryptDict();
242 if (!pEncryptDict)
243 return SUCCESS;
244
245 if (pEncryptDict->GetStringFor("Filter") != "Standard")
246 return HANDLER_ERROR;
247
248 auto pSecurityHandler = pdfium::MakeRetain<CPDF_SecurityHandler>();
249 if (!pSecurityHandler->OnInit(pEncryptDict, GetIDArray(), GetPassword()))
250 return PASSWORD_ERROR;
251
252 m_pSecurityHandler = std::move(pSecurityHandler);
253 return SUCCESS;
254 }
255
ReleaseEncryptHandler()256 void CPDF_Parser::ReleaseEncryptHandler() {
257 m_pSecurityHandler.Reset();
258 }
259
260 // Ideally, all the cross reference entries should be verified.
261 // In reality, we rarely see well-formed cross references don't match
262 // with the objects. crbug/602650 showed a case where object numbers
263 // in the cross reference table are all off by one.
VerifyCrossRefV4()264 bool CPDF_Parser::VerifyCrossRefV4() {
265 for (const auto& it : m_CrossRefTable->objects_info()) {
266 if (it.second.pos == 0)
267 continue;
268 // Find the first non-zero position.
269 FX_FILESIZE SavedPos = m_pSyntax->GetPos();
270 m_pSyntax->SetPos(it.second.pos);
271 bool is_num = false;
272 ByteString num_str = m_pSyntax->GetNextWord(&is_num);
273 m_pSyntax->SetPos(SavedPos);
274 if (!is_num || num_str.IsEmpty() ||
275 FXSYS_atoui(num_str.c_str()) != it.first) {
276 // If the object number read doesn't match the one stored,
277 // something is wrong with the cross reference table.
278 return false;
279 }
280 break;
281 }
282 return true;
283 }
284
LoadAllCrossRefV4(FX_FILESIZE xref_offset)285 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xref_offset) {
286 if (!LoadCrossRefV4(xref_offset, true))
287 return false;
288
289 RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
290 if (!trailer)
291 return false;
292
293 m_CrossRefTable->SetTrailer(std::move(trailer));
294 int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
295 if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
296 ShrinkObjectMap(xrefsize);
297
298 std::vector<FX_FILESIZE> xref_stream_list{
299 GetDirectInteger(GetTrailer(), "XRefStm")};
300 std::vector<FX_FILESIZE> xref_list{xref_offset};
301 std::set<FX_FILESIZE> seen_xref_offset{xref_offset};
302
303 // When the trailer doesn't have Prev entry or Prev entry value is not
304 // numerical, GetDirectInteger() returns 0. Loading will end.
305 xref_offset = GetDirectInteger(GetTrailer(), "Prev");
306 while (xref_offset) {
307 // Check for circular references.
308 if (pdfium::ContainsKey(seen_xref_offset, xref_offset))
309 return false;
310
311 seen_xref_offset.insert(xref_offset);
312
313 // SLOW ...
314 xref_list.insert(xref_list.begin(), xref_offset);
315 LoadCrossRefV4(xref_offset, true);
316
317 RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
318 if (!pDict)
319 return false;
320
321 xref_offset = GetDirectInteger(pDict.Get(), "Prev");
322
323 // SLOW ...
324 xref_stream_list.insert(xref_stream_list.begin(),
325 pDict->GetIntegerFor("XRefStm"));
326
327 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
328 pdfium::MakeUnique<CPDF_CrossRefTable>(std::move(pDict)),
329 std::move(m_CrossRefTable));
330 }
331
332 for (size_t i = 0; i < xref_list.size(); ++i) {
333 if (!LoadCrossRefV4(xref_list[i], false))
334 return false;
335
336 if (xref_stream_list[i] && !LoadCrossRefV5(&xref_stream_list[i], false))
337 return false;
338
339 if (i == 0 && !VerifyCrossRefV4())
340 return false;
341 }
342 return true;
343 }
344
LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset)345 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset) {
346 if (!LoadCrossRefV4(main_xref_offset, false))
347 return false;
348
349 RetainPtr<CPDF_Dictionary> main_trailer = LoadTrailerV4();
350 if (!main_trailer)
351 return false;
352
353 // GetTrailer() currently returns the first-page trailer.
354 if (GetDirectInteger(GetTrailer(), "Size") == 0)
355 return false;
356
357 // Read /XRefStm from the first-page trailer. No need to read /Prev for the
358 // first-page trailer, as the caller already did that and passed it in as
359 // |main_xref_offset|.
360 std::vector<FX_FILESIZE> xref_stream_list{
361 GetDirectInteger(GetTrailer(), "XRefStm")};
362 std::vector<FX_FILESIZE> xref_list{main_xref_offset};
363 std::set<FX_FILESIZE> seen_xref_offset{main_xref_offset};
364
365 // Merge the trailers.
366 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
367 pdfium::MakeUnique<CPDF_CrossRefTable>(std::move(main_trailer)),
368 std::move(m_CrossRefTable));
369
370 // Now GetTrailer() returns the merged trailer, where /Prev is from the
371 // main-trailer.
372 FX_FILESIZE xref_offset = GetDirectInteger(GetTrailer(), "Prev");
373 while (xref_offset) {
374 // Check for circular references.
375 if (pdfium::ContainsKey(seen_xref_offset, xref_offset))
376 return false;
377
378 seen_xref_offset.insert(xref_offset);
379
380 // SLOW ...
381 xref_list.insert(xref_list.begin(), xref_offset);
382 LoadCrossRefV4(xref_offset, true);
383
384 RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
385 if (!pDict)
386 return false;
387
388 xref_offset = GetDirectInteger(pDict.Get(), "Prev");
389
390 // SLOW ...
391 xref_stream_list.insert(xref_stream_list.begin(),
392 pDict->GetIntegerFor("XRefStm"));
393
394 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
395 pdfium::MakeUnique<CPDF_CrossRefTable>(std::move(pDict)),
396 std::move(m_CrossRefTable));
397 }
398
399 if (xref_stream_list[0] && !LoadCrossRefV5(&xref_stream_list[0], false))
400 return false;
401
402 for (size_t i = 1; i < xref_list.size(); ++i) {
403 if (!LoadCrossRefV4(xref_list[i], false))
404 return false;
405
406 if (xref_stream_list[i] && !LoadCrossRefV5(&xref_stream_list[i], false))
407 return false;
408 }
409 return true;
410 }
411
ParseAndAppendCrossRefSubsectionData(uint32_t start_objnum,uint32_t count,std::vector<CrossRefObjData> * out_objects)412 bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
413 uint32_t start_objnum,
414 uint32_t count,
415 std::vector<CrossRefObjData>* out_objects) {
416 if (!count)
417 return true;
418
419 // Each entry shall be exactly 20 byte.
420 // A sample entry looks like:
421 // "0000000000 00007 f\r\n"
422 static constexpr int32_t kEntryConstSize = 20;
423
424 if (!out_objects) {
425 FX_SAFE_FILESIZE pos = count;
426 pos *= kEntryConstSize;
427 pos += m_pSyntax->GetPos();
428 if (!pos.IsValid())
429 return false;
430 m_pSyntax->SetPos(pos.ValueOrDie());
431 return true;
432 }
433 const size_t start_obj_index = out_objects->size();
434 FX_SAFE_SIZE_T new_size = start_obj_index;
435 new_size += count;
436 if (!new_size.IsValid())
437 return false;
438
439 if (new_size.ValueOrDie() > kMaxXRefSize)
440 return false;
441
442 const size_t max_entries_in_file =
443 m_pSyntax->GetDocumentSize() / kEntryConstSize;
444 if (new_size.ValueOrDie() > max_entries_in_file)
445 return false;
446
447 out_objects->resize(new_size.ValueOrDie());
448
449 std::vector<char> buf(1024 * kEntryConstSize + 1);
450 buf.back() = '\0';
451
452 uint32_t nBytesToRead = count;
453 while (nBytesToRead > 0) {
454 const uint32_t block_size = std::min(nBytesToRead, 1024u);
455 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
456 block_size * kEntryConstSize)) {
457 return false;
458 }
459
460 for (uint32_t i = 0; i < block_size; i++) {
461 uint32_t iObjectIndex = count - nBytesToRead + i;
462 CrossRefObjData& obj_data =
463 (*out_objects)[start_obj_index + iObjectIndex];
464 const uint32_t objnum = start_objnum + iObjectIndex;
465 obj_data.obj_num = objnum;
466 ObjectInfo& info = obj_data.info;
467
468 char* pEntry = &buf[i * kEntryConstSize];
469 if (pEntry[17] == 'f') {
470 info.pos = 0;
471 info.type = ObjectType::kFree;
472 } else {
473 const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
474 if (!offset.IsValid())
475 return false;
476
477 if (offset.ValueOrDie() == 0) {
478 for (int32_t c = 0; c < 10; c++) {
479 if (!std::isdigit(pEntry[c]))
480 return false;
481 }
482 }
483
484 info.pos = offset.ValueOrDie();
485
486 // TODO(art-snake): The info.gennum is uint16_t, but version may be
487 // greated than max<uint16_t>. Needs solve this issue.
488 const int32_t version = FXSYS_atoi(pEntry + 11);
489 info.gennum = version;
490 info.type = ObjectType::kNotCompressed;
491 }
492 }
493 nBytesToRead -= block_size;
494 }
495 return true;
496 }
497
ParseCrossRefV4(std::vector<CrossRefObjData> * out_objects)498 bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
499 if (out_objects)
500 out_objects->clear();
501
502 if (m_pSyntax->GetKeyword() != "xref")
503 return false;
504 std::vector<CrossRefObjData> result_objects;
505 while (1) {
506 FX_FILESIZE saved_pos = m_pSyntax->GetPos();
507 bool bIsNumber;
508 ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
509 if (word.IsEmpty())
510 return false;
511
512 if (!bIsNumber) {
513 m_pSyntax->SetPos(saved_pos);
514 break;
515 }
516
517 uint32_t start_objnum = FXSYS_atoui(word.c_str());
518 if (start_objnum >= kMaxObjectNumber)
519 return false;
520
521 uint32_t count = m_pSyntax->GetDirectNum();
522 m_pSyntax->ToNextWord();
523
524 if (!ParseAndAppendCrossRefSubsectionData(
525 start_objnum, count, out_objects ? &result_objects : nullptr)) {
526 return false;
527 }
528 }
529 if (out_objects)
530 *out_objects = std::move(result_objects);
531 return true;
532 }
533
LoadCrossRefV4(FX_FILESIZE pos,bool bSkip)534 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, bool bSkip) {
535 m_pSyntax->SetPos(pos);
536 std::vector<CrossRefObjData> objects;
537 if (!ParseCrossRefV4(bSkip ? nullptr : &objects))
538 return false;
539
540 MergeCrossRefObjectsData(objects);
541 return true;
542 }
543
MergeCrossRefObjectsData(const std::vector<CrossRefObjData> & objects)544 void CPDF_Parser::MergeCrossRefObjectsData(
545 const std::vector<CrossRefObjData>& objects) {
546 for (const auto& obj : objects) {
547 switch (obj.info.type) {
548 case ObjectType::kFree:
549 if (obj.info.gennum > 0)
550 m_CrossRefTable->SetFree(obj.obj_num);
551 break;
552 case ObjectType::kNormal:
553 case ObjectType::kObjStream:
554 m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum, obj.info.pos);
555 break;
556 case ObjectType::kCompressed:
557 m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive_obj_num);
558 break;
559 default:
560 NOTREACHED();
561 }
562 }
563 }
564
LoadAllCrossRefV5(FX_FILESIZE xref_offset)565 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xref_offset) {
566 if (!LoadCrossRefV5(&xref_offset, true))
567 return false;
568
569 std::set<FX_FILESIZE> seen_xref_offset;
570 while (xref_offset) {
571 seen_xref_offset.insert(xref_offset);
572 if (!LoadCrossRefV5(&xref_offset, false))
573 return false;
574
575 // Check for circular references.
576 if (pdfium::ContainsKey(seen_xref_offset, xref_offset))
577 return false;
578 }
579 m_ObjectStreamMap.clear();
580 m_bXRefStream = true;
581 return true;
582 }
583
RebuildCrossRef()584 bool CPDF_Parser::RebuildCrossRef() {
585 auto cross_ref_table = pdfium::MakeUnique<CPDF_CrossRefTable>();
586
587 const uint32_t kBufferSize = 4096;
588 m_pSyntax->SetReadBufferSize(kBufferSize);
589 m_pSyntax->SetPos(0);
590
591 bool bIsNumber;
592 std::vector<std::pair<uint32_t, FX_FILESIZE>> numbers;
593 for (ByteString word = m_pSyntax->GetNextWord(&bIsNumber); !word.IsEmpty();
594 word = m_pSyntax->GetNextWord(&bIsNumber)) {
595 if (bIsNumber) {
596 numbers.emplace_back(FXSYS_atoui(word.c_str()),
597 m_pSyntax->GetPos() - word.GetLength());
598 if (numbers.size() > 2u)
599 numbers.erase(numbers.begin());
600 continue;
601 }
602
603 if (word == "(") {
604 m_pSyntax->ReadString();
605 } else if (word == "<") {
606 m_pSyntax->ReadHexString();
607 } else if (word == "trailer") {
608 RetainPtr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
609 if (pTrailer) {
610 cross_ref_table = CPDF_CrossRefTable::MergeUp(
611 std::move(cross_ref_table),
612 pdfium::MakeUnique<CPDF_CrossRefTable>(ToDictionary(
613 pTrailer->IsStream() ? pTrailer->AsStream()->GetDict()->Clone()
614 : std::move(pTrailer))));
615 }
616 } else if (word == "obj" && numbers.size() == 2u) {
617 const FX_FILESIZE obj_pos = numbers[0].second;
618 const uint32_t obj_num = numbers[0].first;
619 const uint32_t gen_num = numbers[1].first;
620
621 m_pSyntax->SetPos(obj_pos);
622 const RetainPtr<CPDF_Stream> pStream =
623 ToStream(m_pSyntax->GetIndirectObject(
624 nullptr, CPDF_SyntaxParser::ParseType::kStrict));
625
626 if (pStream && pStream->GetDict()->GetStringFor("Type") == "XRef") {
627 cross_ref_table = CPDF_CrossRefTable::MergeUp(
628 std::move(cross_ref_table),
629 pdfium::MakeUnique<CPDF_CrossRefTable>(
630 ToDictionary(pStream->GetDict()->Clone())));
631 }
632
633 if (obj_num < kMaxObjectNumber) {
634 cross_ref_table->AddNormal(obj_num, gen_num, obj_pos);
635 if (const auto object_stream =
636 CPDF_ObjectStream::Create(pStream.Get())) {
637 for (const auto& it : object_stream->objects_offsets()) {
638 if (it.first < kMaxObjectNumber)
639 cross_ref_table->AddCompressed(it.first, obj_num);
640 }
641 }
642 }
643 }
644 numbers.clear();
645 }
646
647 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable),
648 std::move(cross_ref_table));
649 // Resore default buffer size.
650 m_pSyntax->SetReadBufferSize(CPDF_Stream::kFileBufSize);
651
652 return GetTrailer() && !m_CrossRefTable->objects_info().empty();
653 }
654
LoadCrossRefV5(FX_FILESIZE * pos,bool bMainXRef)655 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
656 RetainPtr<CPDF_Object> pObject(ParseIndirectObjectAt(*pos, 0));
657 if (!pObject || !pObject->GetObjNum())
658 return false;
659
660 CPDF_Stream* pStream = pObject->AsStream();
661 if (!pStream)
662 return false;
663
664 CPDF_Dictionary* pDict = pStream->GetDict();
665 *pos = pDict->GetIntegerFor("Prev");
666 int32_t size = pDict->GetIntegerFor("Size");
667 if (size < 0)
668 return false;
669
670 RetainPtr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
671 if (bMainXRef) {
672 m_CrossRefTable =
673 pdfium::MakeUnique<CPDF_CrossRefTable>(std::move(pNewTrailer));
674 m_CrossRefTable->ShrinkObjectMap(size);
675 } else {
676 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
677 pdfium::MakeUnique<CPDF_CrossRefTable>(std::move(pNewTrailer)),
678 std::move(m_CrossRefTable));
679 }
680
681 std::vector<std::pair<int32_t, int32_t>> arrIndex;
682 CPDF_Array* pArray = pDict->GetArrayFor("Index");
683 if (pArray) {
684 for (size_t i = 0; i < pArray->size() / 2; i++) {
685 CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2);
686 CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1);
687
688 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
689 int nStartNum = pStartNumObj->GetInteger();
690 int nCount = pCountObj->GetInteger();
691 if (nStartNum >= 0 && nCount > 0)
692 arrIndex.push_back(std::make_pair(nStartNum, nCount));
693 }
694 }
695 }
696
697 if (arrIndex.empty())
698 arrIndex.push_back(std::make_pair(0, size));
699
700 pArray = pDict->GetArrayFor("W");
701 if (!pArray)
702 return false;
703
704 std::vector<uint32_t> WidthArray;
705 FX_SAFE_UINT32 dwAccWidth = 0;
706 for (size_t i = 0; i < pArray->size(); ++i) {
707 WidthArray.push_back(pArray->GetIntegerAt(i));
708 dwAccWidth += WidthArray[i];
709 }
710
711 if (!dwAccWidth.IsValid() || WidthArray.size() < 3)
712 return false;
713
714 uint32_t totalWidth = dwAccWidth.ValueOrDie();
715 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
716 pAcc->LoadAllDataFiltered();
717
718 const uint8_t* pData = pAcc->GetData();
719 uint32_t dwTotalSize = pAcc->GetSize();
720 uint32_t segindex = 0;
721 for (const auto& index : arrIndex) {
722 const int32_t startnum = index.first;
723 if (startnum < 0)
724 continue;
725
726 uint32_t count = pdfium::base::checked_cast<uint32_t>(index.second);
727 FX_SAFE_UINT32 dwCaculatedSize = segindex;
728 dwCaculatedSize += count;
729 dwCaculatedSize *= totalWidth;
730 if (!dwCaculatedSize.IsValid() ||
731 dwCaculatedSize.ValueOrDie() > dwTotalSize) {
732 continue;
733 }
734
735 const uint8_t* segstart = pData + segindex * totalWidth;
736 FX_SAFE_UINT32 dwMaxObjNum = startnum;
737 dwMaxObjNum += count;
738 uint32_t dwV5Size =
739 m_CrossRefTable->objects_info().empty() ? 0 : GetLastObjNum() + 1;
740 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
741 continue;
742
743 for (uint32_t i = 0; i < count; i++) {
744 ObjectType type = ObjectType::kNotCompressed;
745 const uint8_t* entrystart = segstart + i * totalWidth;
746 if (WidthArray[0]) {
747 const uint32_t cross_ref_stream_obj_type =
748 GetVarInt(entrystart, WidthArray[0]);
749 type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
750 if (type == ObjectType::kNull)
751 continue;
752 }
753
754 const uint32_t objnum = startnum + i;
755 if (objnum >= CPDF_Parser::kMaxObjectNumber)
756 continue;
757
758 const ObjectType existing_type = GetObjectType(objnum);
759 if (existing_type == ObjectType::kNull) {
760 uint32_t offset = GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
761 if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
762 m_CrossRefTable->AddNormal(objnum, 0, offset);
763 continue;
764 }
765
766 if (existing_type != ObjectType::kFree)
767 continue;
768
769 if (type == ObjectType::kFree) {
770 m_CrossRefTable->SetFree(objnum);
771 continue;
772 }
773
774 const uint32_t entry_value =
775 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
776 if (type == ObjectType::kNotCompressed) {
777 const uint32_t offset = entry_value;
778 if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
779 m_CrossRefTable->AddNormal(objnum, 0, offset);
780 continue;
781 }
782
783 ASSERT(type == ObjectType::kCompressed);
784 const uint32_t archive_obj_num = entry_value;
785 if (!IsValidObjectNumber(archive_obj_num))
786 return false;
787
788 m_CrossRefTable->AddCompressed(objnum, archive_obj_num);
789 }
790 segindex += count;
791 }
792 return true;
793 }
794
GetIDArray() const795 const CPDF_Array* CPDF_Parser::GetIDArray() const {
796 return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr;
797 }
798
GetRoot() const799 CPDF_Dictionary* CPDF_Parser::GetRoot() const {
800 CPDF_Object* obj =
801 m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum());
802 return obj ? obj->GetDict() : nullptr;
803 }
804
GetEncryptDict() const805 const CPDF_Dictionary* CPDF_Parser::GetEncryptDict() const {
806 if (!GetTrailer())
807 return nullptr;
808
809 const CPDF_Object* pEncryptObj = GetTrailer()->GetObjectFor("Encrypt");
810 if (!pEncryptObj)
811 return nullptr;
812
813 if (pEncryptObj->IsDictionary())
814 return ToDictionary(pEncryptObj);
815
816 if (pEncryptObj->IsReference()) {
817 return ToDictionary(m_pObjectsHolder->GetOrParseIndirectObject(
818 pEncryptObj->AsReference()->GetRefObjNum()));
819 }
820 return nullptr;
821 }
822
GetEncodedPassword() const823 ByteString CPDF_Parser::GetEncodedPassword() const {
824 return GetSecurityHandler()->GetEncodedPassword(GetPassword().AsStringView());
825 }
826
GetTrailer() const827 const CPDF_Dictionary* CPDF_Parser::GetTrailer() const {
828 return m_CrossRefTable->trailer();
829 }
830
GetMutableTrailerForTesting()831 CPDF_Dictionary* CPDF_Parser::GetMutableTrailerForTesting() {
832 return m_CrossRefTable->GetMutableTrailerForTesting();
833 }
834
GetCombinedTrailer() const835 RetainPtr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
836 return m_CrossRefTable->trailer()
837 ? ToDictionary(m_CrossRefTable->trailer()->Clone())
838 : RetainPtr<CPDF_Dictionary>();
839 }
840
GetInfoObjNum() const841 uint32_t CPDF_Parser::GetInfoObjNum() const {
842 const CPDF_Reference* pRef =
843 ToReference(m_CrossRefTable->trailer()
844 ? m_CrossRefTable->trailer()->GetObjectFor("Info")
845 : nullptr);
846 return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
847 }
848
GetRootObjNum() const849 uint32_t CPDF_Parser::GetRootObjNum() const {
850 const CPDF_Reference* pRef =
851 ToReference(m_CrossRefTable->trailer()
852 ? m_CrossRefTable->trailer()->GetObjectFor("Root")
853 : nullptr);
854 return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
855 }
856
ParseIndirectObject(uint32_t objnum)857 RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObject(uint32_t objnum) {
858 if (!IsValidObjectNumber(objnum))
859 return nullptr;
860
861 // Prevent circular parsing the same object.
862 if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
863 return nullptr;
864
865 pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
866 if (GetObjectType(objnum) == ObjectType::kNotCompressed) {
867 FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
868 if (pos <= 0)
869 return nullptr;
870 return ParseIndirectObjectAt(pos, objnum);
871 }
872 if (GetObjectType(objnum) != ObjectType::kCompressed)
873 return nullptr;
874
875 const CPDF_ObjectStream* pObjStream =
876 GetObjectStream(m_CrossRefTable->GetObjectInfo(objnum)->archive_obj_num);
877 if (!pObjStream)
878 return nullptr;
879
880 return pObjStream->ParseObject(m_pObjectsHolder.Get(), objnum);
881 }
882
GetObjectStream(uint32_t object_number)883 const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) {
884 // Prevent circular parsing the same object.
885 if (pdfium::ContainsKey(m_ParsingObjNums, object_number))
886 return nullptr;
887
888 pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums,
889 object_number);
890
891 auto it = m_ObjectStreamMap.find(object_number);
892 if (it != m_ObjectStreamMap.end())
893 return it->second.get();
894
895 const auto* info = m_CrossRefTable->GetObjectInfo(object_number);
896 if (!info || info->type != ObjectType::kObjStream)
897 return nullptr;
898
899 const FX_FILESIZE object_pos = info->pos;
900 if (object_pos <= 0)
901 return nullptr;
902
903 RetainPtr<CPDF_Object> object =
904 ParseIndirectObjectAt(object_pos, object_number);
905 if (!object)
906 return nullptr;
907
908 std::unique_ptr<CPDF_ObjectStream> objs_stream =
909 CPDF_ObjectStream::Create(ToStream(object.Get()));
910 const CPDF_ObjectStream* result = objs_stream.get();
911 m_ObjectStreamMap[object_number] = std::move(objs_stream);
912
913 return result;
914 }
915
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum)916 RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(FX_FILESIZE pos,
917 uint32_t objnum) {
918 const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
919 m_pSyntax->SetPos(pos);
920
921 auto result = m_pSyntax->GetIndirectObject(
922 m_pObjectsHolder.Get(), CPDF_SyntaxParser::ParseType::kLoose);
923 m_pSyntax->SetPos(saved_pos);
924 if (result && objnum && result->GetObjNum() != objnum)
925 return nullptr;
926
927 const bool should_decrypt = m_pSecurityHandler &&
928 m_pSecurityHandler->GetCryptoHandler() &&
929 objnum != m_MetadataObjnum;
930 if (should_decrypt &&
931 !m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(result)) {
932 return nullptr;
933 }
934 return result;
935 }
936
GetFirstPageNo() const937 uint32_t CPDF_Parser::GetFirstPageNo() const {
938 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
939 }
940
SetLinearizedHeader(std::unique_ptr<CPDF_LinearizedHeader> pLinearized)941 void CPDF_Parser::SetLinearizedHeader(
942 std::unique_ptr<CPDF_LinearizedHeader> pLinearized) {
943 m_pLinearized = std::move(pLinearized);
944 }
945
LoadTrailerV4()946 RetainPtr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
947 if (m_pSyntax->GetKeyword() != "trailer")
948 return nullptr;
949
950 return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder.Get()));
951 }
952
GetPermissions() const953 uint32_t CPDF_Parser::GetPermissions() const {
954 return m_pSecurityHandler ? m_pSecurityHandler->GetPermissions() : 0xFFFFFFFF;
955 }
956
ParseLinearizedHeader()957 std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() {
958 return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
959 }
960
StartLinearizedParse(const RetainPtr<CPDF_ReadValidator> & validator,const char * password)961 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
962 const RetainPtr<CPDF_ReadValidator>& validator,
963 const char* password) {
964 ASSERT(!m_bHasParsed);
965 ASSERT(!m_bXRefTableRebuilt);
966 SetPassword(password);
967 m_bXRefStream = false;
968 m_LastXRefOffset = 0;
969
970 if (!InitSyntaxParser(validator))
971 return FORMAT_ERROR;
972
973 m_pLinearized = ParseLinearizedHeader();
974 if (!m_pLinearized)
975 return StartParseInternal();
976
977 m_bHasParsed = true;
978
979 m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
980 FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
981 bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
982 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
983 if (!RebuildCrossRef())
984 return FORMAT_ERROR;
985
986 m_bXRefTableRebuilt = true;
987 m_LastXRefOffset = 0;
988 }
989 if (bLoadV4) {
990 RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
991 if (!trailer)
992 return SUCCESS;
993
994 m_CrossRefTable->SetTrailer(std::move(trailer));
995 int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
996 if (xrefsize > 0)
997 ShrinkObjectMap(xrefsize);
998 }
999
1000 Error eRet = SetEncryptHandler();
1001 if (eRet != SUCCESS)
1002 return eRet;
1003
1004 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
1005 if (m_bXRefTableRebuilt)
1006 return FORMAT_ERROR;
1007
1008 ReleaseEncryptHandler();
1009 if (!RebuildCrossRef())
1010 return FORMAT_ERROR;
1011
1012 eRet = SetEncryptHandler();
1013 if (eRet != SUCCESS)
1014 return eRet;
1015
1016 m_pObjectsHolder->TryInit();
1017 if (!GetRoot())
1018 return FORMAT_ERROR;
1019 }
1020
1021 if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) {
1022 ReleaseEncryptHandler();
1023 if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum)
1024 return FORMAT_ERROR;
1025
1026 eRet = SetEncryptHandler();
1027 if (eRet != SUCCESS)
1028 return eRet;
1029 }
1030
1031 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1032 if (CPDF_Reference* pMetadata =
1033 ToReference(GetRoot()->GetObjectFor("Metadata")))
1034 m_MetadataObjnum = pMetadata->GetRefObjNum();
1035 }
1036 return SUCCESS;
1037 }
1038
LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset)1039 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset) {
1040 FX_FILESIZE xref_offset = main_xref_offset;
1041 if (!LoadCrossRefV5(&xref_offset, false))
1042 return false;
1043
1044 std::set<FX_FILESIZE> seen_xref_offset;
1045 while (xref_offset) {
1046 seen_xref_offset.insert(xref_offset);
1047 if (!LoadCrossRefV5(&xref_offset, false))
1048 return false;
1049
1050 // Check for circular references.
1051 if (pdfium::ContainsKey(seen_xref_offset, xref_offset))
1052 return false;
1053 }
1054 m_ObjectStreamMap.clear();
1055 m_bXRefStream = true;
1056 return true;
1057 }
1058
LoadLinearizedMainXRefTable()1059 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1060 const FX_SAFE_FILESIZE prev = GetTrailer()->GetIntegerFor("Prev");
1061 const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
1062 if (main_xref_offset < 0)
1063 return FORMAT_ERROR;
1064
1065 if (main_xref_offset == 0)
1066 return SUCCESS;
1067
1068 const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
1069 m_MetadataObjnum = 0;
1070 m_ObjectStreamMap.clear();
1071
1072 if (!LoadLinearizedAllCrossRefV4(main_xref_offset) &&
1073 !LoadLinearizedAllCrossRefV5(main_xref_offset)) {
1074 m_LastXRefOffset = 0;
1075 return FORMAT_ERROR;
1076 }
1077
1078 return SUCCESS;
1079 }
1080
GetObjectTypeFromCrossRefStreamType(uint32_t cross_ref_stream_type) const1081 CPDF_Parser::ObjectType CPDF_Parser::GetObjectTypeFromCrossRefStreamType(
1082 uint32_t cross_ref_stream_type) const {
1083 switch (cross_ref_stream_type) {
1084 case 0:
1085 return CPDF_Parser::ObjectType::kFree;
1086 case 1:
1087 return CPDF_Parser::ObjectType::kNotCompressed;
1088 case 2:
1089 return CPDF_Parser::ObjectType::kCompressed;
1090 default:
1091 return CPDF_Parser::ObjectType::kNull;
1092 }
1093 }
1094