1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_parser.h"
8
9 #include <ctype.h>
10 #include <stdint.h>
11
12 #include <algorithm>
13 #include <utility>
14 #include <vector>
15
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_document.h"
20 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
21 #include "core/fpdfapi/parser/cpdf_number.h"
22 #include "core/fpdfapi/parser/cpdf_object_stream.h"
23 #include "core/fpdfapi/parser/cpdf_read_validator.h"
24 #include "core/fpdfapi/parser/cpdf_reference.h"
25 #include "core/fpdfapi/parser/cpdf_security_handler.h"
26 #include "core/fpdfapi/parser/cpdf_stream.h"
27 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
28 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
29 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
30 #include "core/fxcrt/autorestorer.h"
31 #include "core/fxcrt/data_vector.h"
32 #include "core/fxcrt/fx_extension.h"
33 #include "core/fxcrt/fx_safe_types.h"
34 #include "core/fxcrt/scoped_set_insertion.h"
35 #include "third_party/base/check.h"
36 #include "third_party/base/check_op.h"
37 #include "third_party/base/containers/contains.h"
38 #include "third_party/base/notreached.h"
39 #include "third_party/base/span.h"
40
41 namespace {
42
43 // A limit on the size of the xref table. Theoretical limits are higher, but
44 // this may be large enough in practice.
45 const int32_t kMaxXRefSize = 1048576;
46
47 // "%PDF-1.7\n"
48 constexpr FX_FILESIZE kPDFHeaderSize = 9;
49
50 // The required number of fields in a /W array in a cross-reference stream
51 // dictionary.
52 constexpr size_t kMinFieldCount = 3;
53
54 // V4 trailers are inline.
55 constexpr uint32_t kNoV4TrailerObjectNumber = 0;
56
57 struct CrossRefV5IndexEntry {
58 uint32_t start_obj_num;
59 uint32_t obj_count;
60 };
61
GetObjectTypeFromCrossRefStreamType(uint32_t cross_ref_stream_type)62 CPDF_Parser::ObjectType GetObjectTypeFromCrossRefStreamType(
63 uint32_t cross_ref_stream_type) {
64 switch (cross_ref_stream_type) {
65 case 0:
66 return CPDF_Parser::ObjectType::kFree;
67 case 1:
68 return CPDF_Parser::ObjectType::kNotCompressed;
69 case 2:
70 return CPDF_Parser::ObjectType::kCompressed;
71 default:
72 return CPDF_Parser::ObjectType::kNull;
73 }
74 }
75
76 // Use the Get*XRefStreamEntry() functions below, instead of calling this
77 // directly.
GetVarInt(pdfium::span<const uint8_t> input)78 uint32_t GetVarInt(pdfium::span<const uint8_t> input) {
79 uint32_t result = 0;
80 for (uint8_t c : input)
81 result = result * 256 + c;
82 return result;
83 }
84
85 // The following 3 functions retrieve variable length entries from
86 // cross-reference streams, as described in ISO 32000-1:2008 table 18. There are
87 // only 3 fields for any given entry.
GetFirstXRefStreamEntry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths)88 uint32_t GetFirstXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
89 pdfium::span<const uint32_t> field_widths) {
90 return GetVarInt(entry_span.first(field_widths[0]));
91 }
92
GetSecondXRefStreamEntry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths)93 uint32_t GetSecondXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
94 pdfium::span<const uint32_t> field_widths) {
95 return GetVarInt(entry_span.subspan(field_widths[0], field_widths[1]));
96 }
97
GetThirdXRefStreamEntry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths)98 uint32_t GetThirdXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
99 pdfium::span<const uint32_t> field_widths) {
100 return GetVarInt(
101 entry_span.subspan(field_widths[0] + field_widths[1], field_widths[2]));
102 }
103
GetCrossRefV5Indices(const CPDF_Array * array,uint32_t size)104 std::vector<CrossRefV5IndexEntry> GetCrossRefV5Indices(const CPDF_Array* array,
105 uint32_t size) {
106 std::vector<CrossRefV5IndexEntry> indices;
107 if (array) {
108 for (size_t i = 0; i < array->size() / 2; i++) {
109 RetainPtr<const CPDF_Number> pStartNumObj = array->GetNumberAt(i * 2);
110 if (!pStartNumObj)
111 continue;
112
113 RetainPtr<const CPDF_Number> pCountObj = array->GetNumberAt(i * 2 + 1);
114 if (!pCountObj)
115 continue;
116
117 int nStartNum = pStartNumObj->GetInteger();
118 int nCount = pCountObj->GetInteger();
119 if (nStartNum < 0 || nCount <= 0)
120 continue;
121
122 indices.push_back(
123 {static_cast<uint32_t>(nStartNum), static_cast<uint32_t>(nCount)});
124 }
125 }
126
127 if (indices.empty())
128 indices.push_back({0, size});
129 return indices;
130 }
131
GetFieldWidths(const CPDF_Array * array)132 std::vector<uint32_t> GetFieldWidths(const CPDF_Array* array) {
133 std::vector<uint32_t> results;
134 if (!array)
135 return results;
136
137 CPDF_ArrayLocker locker(array);
138 for (const auto& obj : locker)
139 results.push_back(obj->GetInteger());
140 return results;
141 }
142
143 class ObjectsHolderStub final : public CPDF_Parser::ParsedObjectsHolder {
144 public:
145 ObjectsHolderStub() = default;
146 ~ObjectsHolderStub() override = default;
TryInit()147 bool TryInit() override { return true; }
148 };
149
150 } // namespace
151
CPDF_Parser(ParsedObjectsHolder * holder)152 CPDF_Parser::CPDF_Parser(ParsedObjectsHolder* holder)
153 : m_pObjectsHolder(holder),
154 m_CrossRefTable(std::make_unique<CPDF_CrossRefTable>()) {
155 if (!holder) {
156 m_pOwnedObjectsHolder = std::make_unique<ObjectsHolderStub>();
157 m_pObjectsHolder = m_pOwnedObjectsHolder.get();
158 }
159 }
160
CPDF_Parser()161 CPDF_Parser::CPDF_Parser() : CPDF_Parser(nullptr) {}
162
163 CPDF_Parser::~CPDF_Parser() = default;
164
GetLastObjNum() const165 uint32_t CPDF_Parser::GetLastObjNum() const {
166 return m_CrossRefTable->objects_info().empty()
167 ? 0
168 : m_CrossRefTable->objects_info().rbegin()->first;
169 }
170
IsValidObjectNumber(uint32_t objnum) const171 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
172 return objnum <= GetLastObjNum();
173 }
174
GetObjectPositionOrZero(uint32_t objnum) const175 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
176 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
177 return (info && info->type == ObjectType::kNormal) ? info->pos : 0;
178 }
179
GetObjectType(uint32_t objnum) const180 CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const {
181 DCHECK(IsValidObjectNumber(objnum));
182 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
183 return info ? info->type : ObjectType::kFree;
184 }
185
IsObjectFreeOrNull(uint32_t objnum) const186 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
187 switch (GetObjectType(objnum)) {
188 case ObjectType::kFree:
189 case ObjectType::kNull:
190 return true;
191 case ObjectType::kNotCompressed:
192 case ObjectType::kCompressed:
193 return false;
194 }
195 NOTREACHED();
196 return false;
197 }
198
IsObjectFree(uint32_t objnum) const199 bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
200 return GetObjectType(objnum) == ObjectType::kFree;
201 }
202
ShrinkObjectMap(uint32_t size)203 void CPDF_Parser::ShrinkObjectMap(uint32_t size) {
204 m_CrossRefTable->ShrinkObjectMap(size);
205 }
206
InitSyntaxParser(RetainPtr<CPDF_ReadValidator> validator)207 bool CPDF_Parser::InitSyntaxParser(RetainPtr<CPDF_ReadValidator> validator) {
208 const absl::optional<FX_FILESIZE> header_offset = GetHeaderOffset(validator);
209 if (!header_offset.has_value())
210 return false;
211 if (validator->GetSize() < header_offset.value() + kPDFHeaderSize)
212 return false;
213
214 m_pSyntax = std::make_unique<CPDF_SyntaxParser>(std::move(validator),
215 header_offset.value());
216 return ParseFileVersion();
217 }
218
ParseFileVersion()219 bool CPDF_Parser::ParseFileVersion() {
220 m_FileVersion = 0;
221 uint8_t ch;
222 if (!m_pSyntax->GetCharAt(5, ch))
223 return false;
224
225 if (isdigit(ch))
226 m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10;
227
228 if (!m_pSyntax->GetCharAt(7, ch))
229 return false;
230
231 if (isdigit(ch))
232 m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
233 return true;
234 }
235
StartParse(RetainPtr<IFX_SeekableReadStream> pFileAccess,const ByteString & password)236 CPDF_Parser::Error CPDF_Parser::StartParse(
237 RetainPtr<IFX_SeekableReadStream> pFileAccess,
238 const ByteString& password) {
239 if (!InitSyntaxParser(pdfium::MakeRetain<CPDF_ReadValidator>(
240 std::move(pFileAccess), nullptr)))
241 return FORMAT_ERROR;
242 SetPassword(password);
243 return StartParseInternal();
244 }
245
StartParseInternal()246 CPDF_Parser::Error CPDF_Parser::StartParseInternal() {
247 DCHECK(!m_bHasParsed);
248 DCHECK(!m_bXRefTableRebuilt);
249 m_bHasParsed = true;
250 m_bXRefStream = false;
251
252 m_LastXRefOffset = ParseStartXRef();
253 if (m_LastXRefOffset >= kPDFHeaderSize) {
254 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
255 !LoadAllCrossRefV5(m_LastXRefOffset)) {
256 if (!RebuildCrossRef())
257 return FORMAT_ERROR;
258
259 m_bXRefTableRebuilt = true;
260 m_LastXRefOffset = 0;
261 }
262 } else {
263 if (!RebuildCrossRef())
264 return FORMAT_ERROR;
265
266 m_bXRefTableRebuilt = true;
267 }
268 Error eRet = SetEncryptHandler();
269 if (eRet != SUCCESS)
270 return eRet;
271
272 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
273 if (m_bXRefTableRebuilt)
274 return FORMAT_ERROR;
275
276 ReleaseEncryptHandler();
277 if (!RebuildCrossRef())
278 return FORMAT_ERROR;
279
280 eRet = SetEncryptHandler();
281 if (eRet != SUCCESS)
282 return eRet;
283
284 m_pObjectsHolder->TryInit();
285 if (!GetRoot())
286 return FORMAT_ERROR;
287 }
288 if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) {
289 ReleaseEncryptHandler();
290 if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum)
291 return FORMAT_ERROR;
292
293 eRet = SetEncryptHandler();
294 if (eRet != SUCCESS)
295 return eRet;
296 }
297 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
298 RetainPtr<const CPDF_Reference> pMetadata =
299 ToReference(GetRoot()->GetObjectFor("Metadata"));
300 if (pMetadata)
301 m_MetadataObjnum = pMetadata->GetRefObjNum();
302 }
303 return SUCCESS;
304 }
305
ParseStartXRef()306 FX_FILESIZE CPDF_Parser::ParseStartXRef() {
307 static constexpr char kStartXRefKeyword[] = "startxref";
308 m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword));
309 if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
310 return 0;
311
312 // Skip "startxref" keyword.
313 m_pSyntax->GetKeyword();
314
315 // Read XRef offset.
316 const CPDF_SyntaxParser::WordResult xref_offset_result =
317 m_pSyntax->GetNextWord();
318 if (!xref_offset_result.is_number || xref_offset_result.word.IsEmpty())
319 return 0;
320
321 const FX_SAFE_FILESIZE result = FXSYS_atoi64(xref_offset_result.word.c_str());
322 if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize())
323 return 0;
324
325 return result.ValueOrDie();
326 }
327
SetEncryptHandler()328 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
329 ReleaseEncryptHandler();
330 if (!GetTrailer())
331 return FORMAT_ERROR;
332
333 RetainPtr<const CPDF_Dictionary> pEncryptDict = GetEncryptDict();
334 if (!pEncryptDict)
335 return SUCCESS;
336
337 if (pEncryptDict->GetNameFor("Filter") != "Standard")
338 return HANDLER_ERROR;
339
340 auto pSecurityHandler = pdfium::MakeRetain<CPDF_SecurityHandler>();
341 if (!pSecurityHandler->OnInit(pEncryptDict, GetIDArray(), GetPassword()))
342 return PASSWORD_ERROR;
343
344 m_pSecurityHandler = std::move(pSecurityHandler);
345 return SUCCESS;
346 }
347
ReleaseEncryptHandler()348 void CPDF_Parser::ReleaseEncryptHandler() {
349 m_pSecurityHandler.Reset();
350 }
351
352 // Ideally, all the cross reference entries should be verified.
353 // In reality, we rarely see well-formed cross references don't match
354 // with the objects. crbug/602650 showed a case where object numbers
355 // in the cross reference table are all off by one.
VerifyCrossRefV4()356 bool CPDF_Parser::VerifyCrossRefV4() {
357 for (const auto& it : m_CrossRefTable->objects_info()) {
358 if (it.second.pos <= 0)
359 continue;
360 // Find the first non-zero position.
361 FX_FILESIZE SavedPos = m_pSyntax->GetPos();
362 m_pSyntax->SetPos(it.second.pos);
363 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
364 m_pSyntax->SetPos(SavedPos);
365 if (!word_result.is_number || word_result.word.IsEmpty() ||
366 FXSYS_atoui(word_result.word.c_str()) != it.first) {
367 // If the object number read doesn't match the one stored,
368 // something is wrong with the cross reference table.
369 return false;
370 }
371 break;
372 }
373 return true;
374 }
375
LoadAllCrossRefV4(FX_FILESIZE xref_offset)376 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xref_offset) {
377 if (!LoadCrossRefV4(xref_offset, true))
378 return false;
379
380 RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
381 if (!trailer)
382 return false;
383
384 m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
385 const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
386 if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
387 ShrinkObjectMap(xrefsize);
388
389 FX_FILESIZE xref_stm = GetTrailer()->GetDirectIntegerFor("XRefStm");
390 std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
391 std::vector<FX_FILESIZE> xref_list{xref_offset};
392 std::set<FX_FILESIZE> seen_xref_offset{xref_offset};
393
394 // When the trailer doesn't have Prev entry or Prev entry value is not
395 // numerical, GetDirectInteger() returns 0. Loading will end.
396 xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
397 while (xref_offset > 0) {
398 // Check for circular references.
399 if (pdfium::Contains(seen_xref_offset, xref_offset))
400 return false;
401
402 seen_xref_offset.insert(xref_offset);
403 xref_list.insert(xref_list.begin(), xref_offset);
404
405 // SLOW ...
406 LoadCrossRefV4(xref_offset, true);
407
408 RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
409 if (!pDict)
410 return false;
411
412 xref_offset = pDict->GetDirectIntegerFor("Prev");
413 xref_stm = pDict->GetIntegerFor("XRefStm");
414 xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
415
416 // SLOW ...
417 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
418 std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
419 kNoV4TrailerObjectNumber),
420 std::move(m_CrossRefTable));
421 }
422
423 for (size_t i = 0; i < xref_list.size(); ++i) {
424 if (xref_list[i] > 0 && !LoadCrossRefV4(xref_list[i], false))
425 return false;
426
427 if (xref_stream_list[i] > 0 && !LoadCrossRefV5(&xref_stream_list[i], false))
428 return false;
429
430 if (i == 0 && !VerifyCrossRefV4())
431 return false;
432 }
433 return true;
434 }
435
LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset)436 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset) {
437 if (!LoadCrossRefV4(main_xref_offset, false))
438 return false;
439
440 RetainPtr<CPDF_Dictionary> main_trailer = LoadTrailerV4();
441 if (!main_trailer)
442 return false;
443
444 // GetTrailer() currently returns the first-page trailer.
445 if (GetTrailer()->GetDirectIntegerFor("Size") == 0)
446 return false;
447
448 // Read /XRefStm from the first-page trailer. No need to read /Prev for the
449 // first-page trailer, as the caller already did that and passed it in as
450 // |main_xref_offset|.
451 FX_FILESIZE xref_stm = GetTrailer()->GetDirectIntegerFor("XRefStm");
452 std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
453 std::vector<FX_FILESIZE> xref_list{main_xref_offset};
454 std::set<FX_FILESIZE> seen_xref_offset{main_xref_offset};
455
456 // Merge the trailers.
457 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
458 std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer),
459 kNoV4TrailerObjectNumber),
460 std::move(m_CrossRefTable));
461
462 // Now GetTrailer() returns the merged trailer, where /Prev is from the
463 // main-trailer.
464 FX_FILESIZE xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
465 while (xref_offset > 0) {
466 // Check for circular references.
467 if (pdfium::Contains(seen_xref_offset, xref_offset))
468 return false;
469
470 seen_xref_offset.insert(xref_offset);
471 xref_list.insert(xref_list.begin(), xref_offset);
472
473 // SLOW ...
474 LoadCrossRefV4(xref_offset, true);
475
476 RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
477 if (!pDict)
478 return false;
479
480 xref_offset = pDict->GetDirectIntegerFor("Prev");
481 xref_stm = pDict->GetIntegerFor("XRefStm");
482 xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
483
484 // SLOW ...
485 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
486 std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
487 kNoV4TrailerObjectNumber),
488 std::move(m_CrossRefTable));
489 }
490
491 if (xref_stream_list[0] > 0 && !LoadCrossRefV5(&xref_stream_list[0], false))
492 return false;
493
494 for (size_t i = 1; i < xref_list.size(); ++i) {
495 if (xref_list[i] > 0 && !LoadCrossRefV4(xref_list[i], false))
496 return false;
497
498 if (xref_stream_list[i] > 0 && !LoadCrossRefV5(&xref_stream_list[i], false))
499 return false;
500 }
501 return true;
502 }
503
ParseAndAppendCrossRefSubsectionData(uint32_t start_objnum,uint32_t count,std::vector<CrossRefObjData> * out_objects)504 bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
505 uint32_t start_objnum,
506 uint32_t count,
507 std::vector<CrossRefObjData>* out_objects) {
508 if (!count)
509 return true;
510
511 // Each entry shall be exactly 20 byte.
512 // A sample entry looks like:
513 // "0000000000 00007 f\r\n"
514 static constexpr int32_t kEntrySize = 20;
515
516 if (!out_objects) {
517 FX_SAFE_FILESIZE pos = count;
518 pos *= kEntrySize;
519 pos += m_pSyntax->GetPos();
520 if (!pos.IsValid())
521 return false;
522 m_pSyntax->SetPos(pos.ValueOrDie());
523 return true;
524 }
525 const size_t start_obj_index = out_objects->size();
526 FX_SAFE_SIZE_T new_size = start_obj_index;
527 new_size += count;
528 if (!new_size.IsValid())
529 return false;
530
531 if (new_size.ValueOrDie() > kMaxXRefSize)
532 return false;
533
534 const size_t max_entries_in_file = m_pSyntax->GetDocumentSize() / kEntrySize;
535 if (new_size.ValueOrDie() > max_entries_in_file)
536 return false;
537
538 out_objects->resize(new_size.ValueOrDie());
539
540 DataVector<char> buf(1024 * kEntrySize + 1);
541 buf.back() = '\0';
542
543 uint32_t entries_to_read = count;
544 while (entries_to_read > 0) {
545 const uint32_t entries_in_block = std::min(entries_to_read, 1024u);
546 const uint32_t bytes_to_read = entries_in_block * kEntrySize;
547 auto block_span = pdfium::make_span(buf).first(bytes_to_read);
548 if (!m_pSyntax->ReadBlock(pdfium::as_writable_bytes(block_span)))
549 return false;
550
551 for (uint32_t i = 0; i < entries_in_block; i++) {
552 uint32_t iObjectIndex = count - entries_to_read + i;
553 CrossRefObjData& obj_data =
554 (*out_objects)[start_obj_index + iObjectIndex];
555 const uint32_t objnum = start_objnum + iObjectIndex;
556 obj_data.obj_num = objnum;
557 ObjectInfo& info = obj_data.info;
558
559 const char* pEntry = &buf[i * kEntrySize];
560 if (pEntry[17] == 'f') {
561 info.pos = 0;
562 info.type = ObjectType::kFree;
563 } else {
564 const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
565 if (!offset.IsValid())
566 return false;
567
568 if (offset.ValueOrDie() == 0) {
569 for (int32_t c = 0; c < 10; c++) {
570 if (!isdigit(pEntry[c]))
571 return false;
572 }
573 }
574
575 info.pos = offset.ValueOrDie();
576
577 // TODO(art-snake): The info.gennum is uint16_t, but version may be
578 // greated than max<uint16_t>. Needs solve this issue.
579 const int32_t version = FXSYS_atoi(pEntry + 11);
580 info.gennum = version;
581 info.type = ObjectType::kNotCompressed;
582 }
583 }
584 entries_to_read -= entries_in_block;
585 }
586 return true;
587 }
588
ParseCrossRefV4(std::vector<CrossRefObjData> * out_objects)589 bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
590 if (out_objects)
591 out_objects->clear();
592
593 if (m_pSyntax->GetKeyword() != "xref")
594 return false;
595 std::vector<CrossRefObjData> result_objects;
596 while (true) {
597 FX_FILESIZE saved_pos = m_pSyntax->GetPos();
598 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
599 const ByteString& word = word_result.word;
600 if (word.IsEmpty())
601 return false;
602
603 if (!word_result.is_number) {
604 m_pSyntax->SetPos(saved_pos);
605 break;
606 }
607
608 uint32_t start_objnum = FXSYS_atoui(word.c_str());
609 if (start_objnum >= kMaxObjectNumber)
610 return false;
611
612 uint32_t count = m_pSyntax->GetDirectNum();
613 m_pSyntax->ToNextWord();
614
615 if (!ParseAndAppendCrossRefSubsectionData(
616 start_objnum, count, out_objects ? &result_objects : nullptr)) {
617 return false;
618 }
619 }
620 if (out_objects)
621 *out_objects = std::move(result_objects);
622 return true;
623 }
624
LoadCrossRefV4(FX_FILESIZE pos,bool bSkip)625 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, bool bSkip) {
626 m_pSyntax->SetPos(pos);
627 std::vector<CrossRefObjData> objects;
628 if (!ParseCrossRefV4(bSkip ? nullptr : &objects))
629 return false;
630
631 MergeCrossRefObjectsData(objects);
632 return true;
633 }
634
MergeCrossRefObjectsData(const std::vector<CrossRefObjData> & objects)635 void CPDF_Parser::MergeCrossRefObjectsData(
636 const std::vector<CrossRefObjData>& objects) {
637 for (const auto& obj : objects) {
638 switch (obj.info.type) {
639 case ObjectType::kFree:
640 if (obj.info.gennum > 0)
641 m_CrossRefTable->SetFree(obj.obj_num);
642 break;
643 case ObjectType::kNormal:
644 case ObjectType::kObjStream:
645 m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum, obj.info.pos);
646 break;
647 case ObjectType::kCompressed:
648 m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive.obj_num,
649 obj.info.archive.obj_index);
650 break;
651 default:
652 NOTREACHED();
653 }
654 }
655 }
656
LoadAllCrossRefV5(FX_FILESIZE xref_offset)657 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xref_offset) {
658 if (!LoadCrossRefV5(&xref_offset, true))
659 return false;
660
661 std::set<FX_FILESIZE> seen_xref_offset;
662 while (xref_offset > 0) {
663 seen_xref_offset.insert(xref_offset);
664 if (!LoadCrossRefV5(&xref_offset, false))
665 return false;
666
667 // Check for circular references.
668 if (pdfium::Contains(seen_xref_offset, xref_offset))
669 return false;
670 }
671 m_ObjectStreamMap.clear();
672 m_bXRefStream = true;
673 return true;
674 }
675
RebuildCrossRef()676 bool CPDF_Parser::RebuildCrossRef() {
677 auto cross_ref_table = std::make_unique<CPDF_CrossRefTable>();
678
679 const uint32_t kBufferSize = 4096;
680 m_pSyntax->SetReadBufferSize(kBufferSize);
681 m_pSyntax->SetPos(0);
682
683 std::vector<std::pair<uint32_t, FX_FILESIZE>> numbers;
684 for (CPDF_SyntaxParser::WordResult result = m_pSyntax->GetNextWord();
685 !result.word.IsEmpty(); result = m_pSyntax->GetNextWord()) {
686 const ByteString& word = result.word;
687 if (result.is_number) {
688 numbers.emplace_back(FXSYS_atoui(word.c_str()),
689 m_pSyntax->GetPos() - word.GetLength());
690 if (numbers.size() > 2u)
691 numbers.erase(numbers.begin());
692 continue;
693 }
694
695 if (word == "(") {
696 m_pSyntax->ReadString();
697 } else if (word == "<") {
698 m_pSyntax->ReadHexString();
699 } else if (word == "trailer") {
700 RetainPtr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
701 if (pTrailer) {
702 CPDF_Stream* stream_trailer = pTrailer->AsMutableStream();
703 // Grab the object number from `pTrailer` before potentially calling
704 // std::move(pTrailer) below.
705 const uint32_t trailer_object_number = pTrailer->GetObjNum();
706 RetainPtr<CPDF_Dictionary> trailer_dict =
707 stream_trailer ? stream_trailer->GetMutableDict()
708 : ToDictionary(std::move(pTrailer));
709 cross_ref_table = CPDF_CrossRefTable::MergeUp(
710 std::move(cross_ref_table),
711 std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
712 trailer_object_number));
713 }
714 } else if (word == "obj" && numbers.size() == 2u) {
715 const FX_FILESIZE obj_pos = numbers[0].second;
716 const uint32_t obj_num = numbers[0].first;
717 const uint32_t gen_num = numbers[1].first;
718
719 m_pSyntax->SetPos(obj_pos);
720 const RetainPtr<CPDF_Stream> pStream =
721 ToStream(m_pSyntax->GetIndirectObject(
722 nullptr, CPDF_SyntaxParser::ParseType::kStrict));
723
724 if (pStream && pStream->GetDict()->GetNameFor("Type") == "XRef") {
725 cross_ref_table = CPDF_CrossRefTable::MergeUp(
726 std::move(cross_ref_table),
727 std::make_unique<CPDF_CrossRefTable>(
728 ToDictionary(pStream->GetDict()->Clone()),
729 pStream->GetObjNum()));
730 }
731
732 if (obj_num < kMaxObjectNumber) {
733 cross_ref_table->AddNormal(obj_num, gen_num, obj_pos);
734 const auto object_stream =
735 CPDF_ObjectStream::Create(std::move(pStream));
736 if (object_stream) {
737 const auto& object_info = object_stream->object_info();
738 for (size_t i = 0; i < object_info.size(); ++i) {
739 const auto& info = object_info[i];
740 if (info.obj_num < kMaxObjectNumber)
741 cross_ref_table->AddCompressed(info.obj_num, obj_num, i);
742 }
743 }
744 }
745 }
746 numbers.clear();
747 }
748
749 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable),
750 std::move(cross_ref_table));
751 // Resore default buffer size.
752 m_pSyntax->SetReadBufferSize(CPDF_Stream::kFileBufSize);
753
754 return GetTrailer() && !m_CrossRefTable->objects_info().empty();
755 }
756
LoadCrossRefV5(FX_FILESIZE * pos,bool bMainXRef)757 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
758 RetainPtr<CPDF_Object> pObject(ParseIndirectObjectAt(*pos, 0));
759 if (!pObject || !pObject->GetObjNum())
760 return false;
761
762 RetainPtr<const CPDF_Stream> pStream(pObject->AsStream());
763 if (!pStream)
764 return false;
765
766 RetainPtr<const CPDF_Dictionary> pDict = pStream->GetDict();
767 int32_t prev = pDict->GetIntegerFor("Prev");
768 if (prev < 0)
769 return false;
770
771 int32_t size = pDict->GetIntegerFor("Size");
772 if (size < 0)
773 return false;
774
775 *pos = prev;
776
777 RetainPtr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
778 if (bMainXRef) {
779 m_CrossRefTable = std::make_unique<CPDF_CrossRefTable>(
780 std::move(pNewTrailer), pStream->GetObjNum());
781 m_CrossRefTable->ShrinkObjectMap(size);
782 } else {
783 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
784 std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer),
785 pStream->GetObjNum()),
786 std::move(m_CrossRefTable));
787 }
788
789 std::vector<CrossRefV5IndexEntry> indices =
790 GetCrossRefV5Indices(pDict->GetArrayFor("Index").Get(), size);
791
792 std::vector<uint32_t> field_widths =
793 GetFieldWidths(pDict->GetArrayFor("W").Get());
794 if (field_widths.size() < kMinFieldCount)
795 return false;
796
797 FX_SAFE_UINT32 dwAccWidth;
798 for (uint32_t width : field_widths)
799 dwAccWidth += width;
800 if (!dwAccWidth.IsValid())
801 return false;
802
803 uint32_t total_width = dwAccWidth.ValueOrDie();
804 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStream));
805 pAcc->LoadAllDataFiltered();
806
807 pdfium::span<const uint8_t> data_span = pAcc->GetSpan();
808 uint32_t segindex = 0;
809 for (const auto& index : indices) {
810 FX_SAFE_UINT32 seg_end = segindex;
811 seg_end += index.obj_count;
812 seg_end *= total_width;
813 if (!seg_end.IsValid() || seg_end.ValueOrDie() > data_span.size())
814 continue;
815
816 pdfium::span<const uint8_t> seg_span = data_span.subspan(
817 segindex * total_width, index.obj_count * total_width);
818 FX_SAFE_UINT32 dwMaxObjNum = index.start_obj_num;
819 dwMaxObjNum += index.obj_count;
820 uint32_t dwV5Size =
821 m_CrossRefTable->objects_info().empty() ? 0 : GetLastObjNum() + 1;
822 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
823 continue;
824
825 for (uint32_t i = 0; i < index.obj_count; ++i) {
826 const uint32_t obj_num = index.start_obj_num + i;
827 if (obj_num >= CPDF_Parser::kMaxObjectNumber)
828 break;
829
830 ProcessCrossRefV5Entry(seg_span.subspan(i * total_width, total_width),
831 field_widths, obj_num);
832 }
833
834 segindex += index.obj_count;
835 }
836 return true;
837 }
838
ProcessCrossRefV5Entry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths,uint32_t obj_num)839 void CPDF_Parser::ProcessCrossRefV5Entry(
840 pdfium::span<const uint8_t> entry_span,
841 pdfium::span<const uint32_t> field_widths,
842 uint32_t obj_num) {
843 DCHECK_GE(field_widths.size(), kMinFieldCount);
844 ObjectType type = ObjectType::kNotCompressed;
845 if (field_widths[0]) {
846 const uint32_t cross_ref_stream_obj_type =
847 GetFirstXRefStreamEntry(entry_span, field_widths);
848 type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
849 if (type == ObjectType::kNull)
850 return;
851 }
852
853 const ObjectType existing_type = GetObjectType(obj_num);
854 if (existing_type == ObjectType::kNull) {
855 const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
856 if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
857 m_CrossRefTable->AddNormal(obj_num, 0, offset);
858 return;
859 }
860
861 if (existing_type != ObjectType::kFree)
862 return;
863
864 if (type == ObjectType::kFree) {
865 m_CrossRefTable->SetFree(obj_num);
866 return;
867 }
868
869 if (type == ObjectType::kNotCompressed) {
870 const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
871 if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
872 m_CrossRefTable->AddNormal(obj_num, 0, offset);
873 return;
874 }
875
876 DCHECK_EQ(type, ObjectType::kCompressed);
877 const uint32_t archive_obj_num =
878 GetSecondXRefStreamEntry(entry_span, field_widths);
879 if (!IsValidObjectNumber(archive_obj_num)) {
880 return;
881 }
882
883 const uint32_t archive_obj_index =
884 GetThirdXRefStreamEntry(entry_span, field_widths);
885 m_CrossRefTable->AddCompressed(obj_num, archive_obj_num, archive_obj_index);
886 }
887
GetIDArray() const888 RetainPtr<const CPDF_Array> CPDF_Parser::GetIDArray() const {
889 return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr;
890 }
891
GetRoot() const892 RetainPtr<const CPDF_Dictionary> CPDF_Parser::GetRoot() const {
893 RetainPtr<CPDF_Object> obj =
894 m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum());
895 return obj ? obj->GetDict() : nullptr;
896 }
897
GetEncryptDict() const898 RetainPtr<const CPDF_Dictionary> CPDF_Parser::GetEncryptDict() const {
899 if (!GetTrailer())
900 return nullptr;
901
902 RetainPtr<const CPDF_Object> pEncryptObj =
903 GetTrailer()->GetObjectFor("Encrypt");
904 if (!pEncryptObj)
905 return nullptr;
906
907 if (pEncryptObj->IsDictionary())
908 return pdfium::WrapRetain(pEncryptObj->AsDictionary());
909
910 if (pEncryptObj->IsReference()) {
911 return ToDictionary(m_pObjectsHolder->GetOrParseIndirectObject(
912 pEncryptObj->AsReference()->GetRefObjNum()));
913 }
914 return nullptr;
915 }
916
GetEncodedPassword() const917 ByteString CPDF_Parser::GetEncodedPassword() const {
918 return GetSecurityHandler()->GetEncodedPassword(GetPassword().AsStringView());
919 }
920
GetTrailer() const921 const CPDF_Dictionary* CPDF_Parser::GetTrailer() const {
922 return m_CrossRefTable->trailer();
923 }
924
GetMutableTrailerForTesting()925 CPDF_Dictionary* CPDF_Parser::GetMutableTrailerForTesting() {
926 return m_CrossRefTable->GetMutableTrailerForTesting();
927 }
928
GetTrailerObjectNumber() const929 uint32_t CPDF_Parser::GetTrailerObjectNumber() const {
930 return m_CrossRefTable->trailer_object_number();
931 }
932
GetCombinedTrailer() const933 RetainPtr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
934 return m_CrossRefTable->trailer()
935 ? ToDictionary(m_CrossRefTable->trailer()->Clone())
936 : RetainPtr<CPDF_Dictionary>();
937 }
938
GetInfoObjNum() const939 uint32_t CPDF_Parser::GetInfoObjNum() const {
940 RetainPtr<const CPDF_Reference> pRef =
941 ToReference(m_CrossRefTable->trailer()
942 ? m_CrossRefTable->trailer()->GetObjectFor("Info")
943 : nullptr);
944 return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
945 }
946
GetRootObjNum() const947 uint32_t CPDF_Parser::GetRootObjNum() const {
948 RetainPtr<const CPDF_Reference> pRef =
949 ToReference(m_CrossRefTable->trailer()
950 ? m_CrossRefTable->trailer()->GetObjectFor("Root")
951 : nullptr);
952 return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
953 }
954
ParseIndirectObject(uint32_t objnum)955 RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObject(uint32_t objnum) {
956 if (!IsValidObjectNumber(objnum))
957 return nullptr;
958
959 // Prevent circular parsing the same object.
960 if (pdfium::Contains(m_ParsingObjNums, objnum))
961 return nullptr;
962
963 ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
964 if (GetObjectType(objnum) == ObjectType::kNotCompressed) {
965 FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
966 if (pos <= 0)
967 return nullptr;
968 return ParseIndirectObjectAt(pos, objnum);
969 }
970 if (GetObjectType(objnum) != ObjectType::kCompressed)
971 return nullptr;
972
973 const ObjectInfo& info = *m_CrossRefTable->GetObjectInfo(objnum);
974 const CPDF_ObjectStream* pObjStream = GetObjectStream(info.archive.obj_num);
975 if (!pObjStream)
976 return nullptr;
977
978 return pObjStream->ParseObject(m_pObjectsHolder, objnum,
979 info.archive.obj_index);
980 }
981
GetObjectStream(uint32_t object_number)982 const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) {
983 // Prevent circular parsing the same object.
984 if (pdfium::Contains(m_ParsingObjNums, object_number))
985 return nullptr;
986
987 auto it = m_ObjectStreamMap.find(object_number);
988 if (it != m_ObjectStreamMap.end())
989 return it->second.get();
990
991 const auto* info = m_CrossRefTable->GetObjectInfo(object_number);
992 if (!info || info->type != ObjectType::kObjStream)
993 return nullptr;
994
995 const FX_FILESIZE object_pos = info->pos;
996 if (object_pos <= 0)
997 return nullptr;
998
999 // Keep track of `object_number` before doing more parsing.
1000 ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, object_number);
1001
1002 RetainPtr<CPDF_Object> object =
1003 ParseIndirectObjectAt(object_pos, object_number);
1004 if (!object)
1005 return nullptr;
1006
1007 std::unique_ptr<CPDF_ObjectStream> objs_stream =
1008 CPDF_ObjectStream::Create(ToStream(object));
1009 const CPDF_ObjectStream* result = objs_stream.get();
1010 m_ObjectStreamMap[object_number] = std::move(objs_stream);
1011
1012 return result;
1013 }
1014
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum)1015 RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(FX_FILESIZE pos,
1016 uint32_t objnum) {
1017 const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
1018 m_pSyntax->SetPos(pos);
1019
1020 auto result = m_pSyntax->GetIndirectObject(
1021 m_pObjectsHolder, CPDF_SyntaxParser::ParseType::kLoose);
1022 m_pSyntax->SetPos(saved_pos);
1023 if (result && objnum && result->GetObjNum() != objnum)
1024 return nullptr;
1025
1026 const bool should_decrypt = m_pSecurityHandler &&
1027 m_pSecurityHandler->GetCryptoHandler() &&
1028 objnum != m_MetadataObjnum;
1029 if (should_decrypt &&
1030 !m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(result)) {
1031 return nullptr;
1032 }
1033 return result;
1034 }
1035
GetDocumentSize() const1036 FX_FILESIZE CPDF_Parser::GetDocumentSize() const {
1037 return m_pSyntax->GetDocumentSize();
1038 }
1039
GetFirstPageNo() const1040 uint32_t CPDF_Parser::GetFirstPageNo() const {
1041 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1042 }
1043
SetLinearizedHeaderForTesting(std::unique_ptr<CPDF_LinearizedHeader> pLinearized)1044 void CPDF_Parser::SetLinearizedHeaderForTesting(
1045 std::unique_ptr<CPDF_LinearizedHeader> pLinearized) {
1046 m_pLinearized = std::move(pLinearized);
1047 }
1048
LoadTrailerV4()1049 RetainPtr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
1050 if (m_pSyntax->GetKeyword() != "trailer")
1051 return nullptr;
1052
1053 return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder));
1054 }
1055
GetPermissions() const1056 uint32_t CPDF_Parser::GetPermissions() const {
1057 return m_pSecurityHandler ? m_pSecurityHandler->GetPermissions() : 0xFFFFFFFF;
1058 }
1059
ParseLinearizedHeader()1060 std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() {
1061 return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
1062 }
1063
StartLinearizedParse(RetainPtr<CPDF_ReadValidator> validator,const ByteString & password)1064 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
1065 RetainPtr<CPDF_ReadValidator> validator,
1066 const ByteString& password) {
1067 DCHECK(!m_bHasParsed);
1068 DCHECK(!m_bXRefTableRebuilt);
1069 SetPassword(password);
1070 m_bXRefStream = false;
1071 m_LastXRefOffset = 0;
1072
1073 if (!InitSyntaxParser(std::move(validator)))
1074 return FORMAT_ERROR;
1075
1076 m_pLinearized = ParseLinearizedHeader();
1077 if (!m_pLinearized)
1078 return StartParseInternal();
1079
1080 m_bHasParsed = true;
1081
1082 m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
1083 FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
1084 bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
1085 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
1086 if (!RebuildCrossRef())
1087 return FORMAT_ERROR;
1088
1089 m_bXRefTableRebuilt = true;
1090 m_LastXRefOffset = 0;
1091 }
1092 if (bLoadV4) {
1093 RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
1094 if (!trailer)
1095 return SUCCESS;
1096
1097 m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
1098 const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
1099 if (xrefsize > 0) {
1100 // Check if `xrefsize` is correct. If it is incorrect, give up and rebuild
1101 // the xref table.
1102 const uint32_t expected_last_obj_num = xrefsize - 1;
1103 if (GetLastObjNum() != expected_last_obj_num && !RebuildCrossRef()) {
1104 return FORMAT_ERROR;
1105 }
1106 }
1107 }
1108
1109 Error eRet = SetEncryptHandler();
1110 if (eRet != SUCCESS)
1111 return eRet;
1112
1113 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
1114 if (m_bXRefTableRebuilt)
1115 return FORMAT_ERROR;
1116
1117 ReleaseEncryptHandler();
1118 if (!RebuildCrossRef())
1119 return FORMAT_ERROR;
1120
1121 eRet = SetEncryptHandler();
1122 if (eRet != SUCCESS)
1123 return eRet;
1124
1125 m_pObjectsHolder->TryInit();
1126 if (!GetRoot())
1127 return FORMAT_ERROR;
1128 }
1129
1130 if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) {
1131 ReleaseEncryptHandler();
1132 if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum)
1133 return FORMAT_ERROR;
1134
1135 eRet = SetEncryptHandler();
1136 if (eRet != SUCCESS)
1137 return eRet;
1138 }
1139
1140 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1141 RetainPtr<const CPDF_Reference> pMetadata =
1142 ToReference(GetRoot()->GetObjectFor("Metadata"));
1143 if (pMetadata)
1144 m_MetadataObjnum = pMetadata->GetRefObjNum();
1145 }
1146 return SUCCESS;
1147 }
1148
LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset)1149 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset) {
1150 FX_FILESIZE xref_offset = main_xref_offset;
1151 if (!LoadCrossRefV5(&xref_offset, false))
1152 return false;
1153
1154 std::set<FX_FILESIZE> seen_xref_offset;
1155 while (xref_offset) {
1156 seen_xref_offset.insert(xref_offset);
1157 if (!LoadCrossRefV5(&xref_offset, false))
1158 return false;
1159
1160 // Check for circular references.
1161 if (pdfium::Contains(seen_xref_offset, xref_offset))
1162 return false;
1163 }
1164 m_ObjectStreamMap.clear();
1165 m_bXRefStream = true;
1166 return true;
1167 }
1168
LoadLinearizedMainXRefTable()1169 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1170 const FX_SAFE_FILESIZE prev = GetTrailer()->GetIntegerFor("Prev");
1171 const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
1172 if (main_xref_offset < 0)
1173 return FORMAT_ERROR;
1174
1175 if (main_xref_offset == 0)
1176 return SUCCESS;
1177
1178 const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
1179 m_MetadataObjnum = 0;
1180 m_ObjectStreamMap.clear();
1181
1182 if (!LoadLinearizedAllCrossRefV4(main_xref_offset) &&
1183 !LoadLinearizedAllCrossRefV5(main_xref_offset)) {
1184 m_LastXRefOffset = 0;
1185 return FORMAT_ERROR;
1186 }
1187
1188 return SUCCESS;
1189 }
1190
SetSyntaxParserForTesting(std::unique_ptr<CPDF_SyntaxParser> parser)1191 void CPDF_Parser::SetSyntaxParserForTesting(
1192 std::unique_ptr<CPDF_SyntaxParser> parser) {
1193 m_pSyntax = std::move(parser);
1194 }
1195
GetTrailerEnds()1196 std::vector<unsigned int> CPDF_Parser::GetTrailerEnds() {
1197 std::vector<unsigned int> trailer_ends;
1198 m_pSyntax->SetTrailerEnds(&trailer_ends);
1199
1200 // Traverse the document.
1201 m_pSyntax->SetPos(0);
1202 while (true) {
1203 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
1204 if (word_result.is_number) {
1205 // The object number was read. Read the generation number.
1206 word_result = m_pSyntax->GetNextWord();
1207 if (!word_result.is_number)
1208 break;
1209
1210 word_result = m_pSyntax->GetNextWord();
1211 if (word_result.word != "obj")
1212 break;
1213
1214 m_pSyntax->GetObjectBody(nullptr);
1215
1216 word_result = m_pSyntax->GetNextWord();
1217 if (word_result.word != "endobj")
1218 break;
1219 } else if (word_result.word == "trailer") {
1220 m_pSyntax->GetObjectBody(nullptr);
1221 } else if (word_result.word == "startxref") {
1222 m_pSyntax->GetNextWord();
1223 } else if (word_result.word == "xref") {
1224 while (true) {
1225 word_result = m_pSyntax->GetNextWord();
1226 if (word_result.word.IsEmpty() || word_result.word == "startxref")
1227 break;
1228 }
1229 m_pSyntax->GetNextWord();
1230 } else {
1231 break;
1232 }
1233 }
1234
1235 // Stop recording trailer ends.
1236 m_pSyntax->SetTrailerEnds(nullptr);
1237 return trailer_ends;
1238 }
1239
WriteToArchive(IFX_ArchiveStream * archive,FX_FILESIZE src_size)1240 bool CPDF_Parser::WriteToArchive(IFX_ArchiveStream* archive,
1241 FX_FILESIZE src_size) {
1242 static constexpr FX_FILESIZE kBufferSize = 4096;
1243 DataVector<uint8_t> buffer(kBufferSize);
1244 m_pSyntax->SetPos(0);
1245 while (src_size) {
1246 const uint32_t block_size =
1247 static_cast<uint32_t>(std::min(kBufferSize, src_size));
1248 auto block_span = pdfium::make_span(buffer).first(block_size);
1249 if (!m_pSyntax->ReadBlock(block_span))
1250 return false;
1251 if (!archive->WriteBlock(pdfium::make_span(buffer).first(block_size)))
1252 return false;
1253 src_size -= block_size;
1254 }
1255 return true;
1256 }
1257