• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_parser.h"
8 
9 #include <ctype.h>
10 #include <stdint.h>
11 
12 #include <algorithm>
13 #include <utility>
14 #include <vector>
15 
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_document.h"
20 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
21 #include "core/fpdfapi/parser/cpdf_number.h"
22 #include "core/fpdfapi/parser/cpdf_object_stream.h"
23 #include "core/fpdfapi/parser/cpdf_read_validator.h"
24 #include "core/fpdfapi/parser/cpdf_reference.h"
25 #include "core/fpdfapi/parser/cpdf_security_handler.h"
26 #include "core/fpdfapi/parser/cpdf_stream.h"
27 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
28 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
29 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
30 #include "core/fxcrt/autorestorer.h"
31 #include "core/fxcrt/data_vector.h"
32 #include "core/fxcrt/fx_extension.h"
33 #include "core/fxcrt/fx_safe_types.h"
34 #include "core/fxcrt/scoped_set_insertion.h"
35 #include "third_party/base/check.h"
36 #include "third_party/base/check_op.h"
37 #include "third_party/base/containers/contains.h"
38 #include "third_party/base/notreached.h"
39 #include "third_party/base/span.h"
40 
41 namespace {
42 
43 // A limit on the size of the xref table. Theoretical limits are higher, but
44 // this may be large enough in practice.
45 const int32_t kMaxXRefSize = 1048576;
46 
47 // "%PDF-1.7\n"
48 constexpr FX_FILESIZE kPDFHeaderSize = 9;
49 
50 // The required number of fields in a /W array in a cross-reference stream
51 // dictionary.
52 constexpr size_t kMinFieldCount = 3;
53 
54 // V4 trailers are inline.
55 constexpr uint32_t kNoV4TrailerObjectNumber = 0;
56 
57 struct CrossRefV5IndexEntry {
58   uint32_t start_obj_num;
59   uint32_t obj_count;
60 };
61 
GetObjectTypeFromCrossRefStreamType(uint32_t cross_ref_stream_type)62 CPDF_Parser::ObjectType GetObjectTypeFromCrossRefStreamType(
63     uint32_t cross_ref_stream_type) {
64   switch (cross_ref_stream_type) {
65     case 0:
66       return CPDF_Parser::ObjectType::kFree;
67     case 1:
68       return CPDF_Parser::ObjectType::kNotCompressed;
69     case 2:
70       return CPDF_Parser::ObjectType::kCompressed;
71     default:
72       return CPDF_Parser::ObjectType::kNull;
73   }
74 }
75 
76 // Use the Get*XRefStreamEntry() functions below, instead of calling this
77 // directly.
GetVarInt(pdfium::span<const uint8_t> input)78 uint32_t GetVarInt(pdfium::span<const uint8_t> input) {
79   uint32_t result = 0;
80   for (uint8_t c : input)
81     result = result * 256 + c;
82   return result;
83 }
84 
85 // The following 3 functions retrieve variable length entries from
86 // cross-reference streams, as described in ISO 32000-1:2008 table 18. There are
87 // only 3 fields for any given entry.
GetFirstXRefStreamEntry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths)88 uint32_t GetFirstXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
89                                  pdfium::span<const uint32_t> field_widths) {
90   return GetVarInt(entry_span.first(field_widths[0]));
91 }
92 
GetSecondXRefStreamEntry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths)93 uint32_t GetSecondXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
94                                   pdfium::span<const uint32_t> field_widths) {
95   return GetVarInt(entry_span.subspan(field_widths[0], field_widths[1]));
96 }
97 
GetThirdXRefStreamEntry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths)98 uint32_t GetThirdXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
99                                  pdfium::span<const uint32_t> field_widths) {
100   return GetVarInt(
101       entry_span.subspan(field_widths[0] + field_widths[1], field_widths[2]));
102 }
103 
GetCrossRefV5Indices(const CPDF_Array * array,uint32_t size)104 std::vector<CrossRefV5IndexEntry> GetCrossRefV5Indices(const CPDF_Array* array,
105                                                        uint32_t size) {
106   std::vector<CrossRefV5IndexEntry> indices;
107   if (array) {
108     for (size_t i = 0; i < array->size() / 2; i++) {
109       RetainPtr<const CPDF_Number> pStartNumObj = array->GetNumberAt(i * 2);
110       if (!pStartNumObj)
111         continue;
112 
113       RetainPtr<const CPDF_Number> pCountObj = array->GetNumberAt(i * 2 + 1);
114       if (!pCountObj)
115         continue;
116 
117       int nStartNum = pStartNumObj->GetInteger();
118       int nCount = pCountObj->GetInteger();
119       if (nStartNum < 0 || nCount <= 0)
120         continue;
121 
122       indices.push_back(
123           {static_cast<uint32_t>(nStartNum), static_cast<uint32_t>(nCount)});
124     }
125   }
126 
127   if (indices.empty())
128     indices.push_back({0, size});
129   return indices;
130 }
131 
GetFieldWidths(const CPDF_Array * array)132 std::vector<uint32_t> GetFieldWidths(const CPDF_Array* array) {
133   std::vector<uint32_t> results;
134   if (!array)
135     return results;
136 
137   CPDF_ArrayLocker locker(array);
138   for (const auto& obj : locker)
139     results.push_back(obj->GetInteger());
140   return results;
141 }
142 
143 class ObjectsHolderStub final : public CPDF_Parser::ParsedObjectsHolder {
144  public:
145   ObjectsHolderStub() = default;
146   ~ObjectsHolderStub() override = default;
TryInit()147   bool TryInit() override { return true; }
148 };
149 
150 }  // namespace
151 
CPDF_Parser(ParsedObjectsHolder * holder)152 CPDF_Parser::CPDF_Parser(ParsedObjectsHolder* holder)
153     : m_pObjectsHolder(holder),
154       m_CrossRefTable(std::make_unique<CPDF_CrossRefTable>()) {
155   if (!holder) {
156     m_pOwnedObjectsHolder = std::make_unique<ObjectsHolderStub>();
157     m_pObjectsHolder = m_pOwnedObjectsHolder.get();
158   }
159 }
160 
CPDF_Parser()161 CPDF_Parser::CPDF_Parser() : CPDF_Parser(nullptr) {}
162 
163 CPDF_Parser::~CPDF_Parser() = default;
164 
GetLastObjNum() const165 uint32_t CPDF_Parser::GetLastObjNum() const {
166   return m_CrossRefTable->objects_info().empty()
167              ? 0
168              : m_CrossRefTable->objects_info().rbegin()->first;
169 }
170 
IsValidObjectNumber(uint32_t objnum) const171 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
172   return objnum <= GetLastObjNum();
173 }
174 
GetObjectPositionOrZero(uint32_t objnum) const175 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
176   const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
177   return (info && info->type == ObjectType::kNormal) ? info->pos : 0;
178 }
179 
GetObjectType(uint32_t objnum) const180 CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const {
181   DCHECK(IsValidObjectNumber(objnum));
182   const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
183   return info ? info->type : ObjectType::kFree;
184 }
185 
IsObjectFreeOrNull(uint32_t objnum) const186 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
187   switch (GetObjectType(objnum)) {
188     case ObjectType::kFree:
189     case ObjectType::kNull:
190       return true;
191     case ObjectType::kNotCompressed:
192     case ObjectType::kCompressed:
193       return false;
194   }
195   NOTREACHED();
196   return false;
197 }
198 
IsObjectFree(uint32_t objnum) const199 bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
200   return GetObjectType(objnum) == ObjectType::kFree;
201 }
202 
ShrinkObjectMap(uint32_t size)203 void CPDF_Parser::ShrinkObjectMap(uint32_t size) {
204   m_CrossRefTable->ShrinkObjectMap(size);
205 }
206 
InitSyntaxParser(RetainPtr<CPDF_ReadValidator> validator)207 bool CPDF_Parser::InitSyntaxParser(RetainPtr<CPDF_ReadValidator> validator) {
208   const absl::optional<FX_FILESIZE> header_offset = GetHeaderOffset(validator);
209   if (!header_offset.has_value())
210     return false;
211   if (validator->GetSize() < header_offset.value() + kPDFHeaderSize)
212     return false;
213 
214   m_pSyntax = std::make_unique<CPDF_SyntaxParser>(std::move(validator),
215                                                   header_offset.value());
216   return ParseFileVersion();
217 }
218 
ParseFileVersion()219 bool CPDF_Parser::ParseFileVersion() {
220   m_FileVersion = 0;
221   uint8_t ch;
222   if (!m_pSyntax->GetCharAt(5, ch))
223     return false;
224 
225   if (isdigit(ch))
226     m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10;
227 
228   if (!m_pSyntax->GetCharAt(7, ch))
229     return false;
230 
231   if (isdigit(ch))
232     m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
233   return true;
234 }
235 
StartParse(RetainPtr<IFX_SeekableReadStream> pFileAccess,const ByteString & password)236 CPDF_Parser::Error CPDF_Parser::StartParse(
237     RetainPtr<IFX_SeekableReadStream> pFileAccess,
238     const ByteString& password) {
239   if (!InitSyntaxParser(pdfium::MakeRetain<CPDF_ReadValidator>(
240           std::move(pFileAccess), nullptr)))
241     return FORMAT_ERROR;
242   SetPassword(password);
243   return StartParseInternal();
244 }
245 
StartParseInternal()246 CPDF_Parser::Error CPDF_Parser::StartParseInternal() {
247   DCHECK(!m_bHasParsed);
248   DCHECK(!m_bXRefTableRebuilt);
249   m_bHasParsed = true;
250   m_bXRefStream = false;
251 
252   m_LastXRefOffset = ParseStartXRef();
253   if (m_LastXRefOffset >= kPDFHeaderSize) {
254     if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
255         !LoadAllCrossRefV5(m_LastXRefOffset)) {
256       if (!RebuildCrossRef())
257         return FORMAT_ERROR;
258 
259       m_bXRefTableRebuilt = true;
260       m_LastXRefOffset = 0;
261     }
262   } else {
263     if (!RebuildCrossRef())
264       return FORMAT_ERROR;
265 
266     m_bXRefTableRebuilt = true;
267   }
268   Error eRet = SetEncryptHandler();
269   if (eRet != SUCCESS)
270     return eRet;
271 
272   if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
273     if (m_bXRefTableRebuilt)
274       return FORMAT_ERROR;
275 
276     ReleaseEncryptHandler();
277     if (!RebuildCrossRef())
278       return FORMAT_ERROR;
279 
280     eRet = SetEncryptHandler();
281     if (eRet != SUCCESS)
282       return eRet;
283 
284     m_pObjectsHolder->TryInit();
285     if (!GetRoot())
286       return FORMAT_ERROR;
287   }
288   if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) {
289     ReleaseEncryptHandler();
290     if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum)
291       return FORMAT_ERROR;
292 
293     eRet = SetEncryptHandler();
294     if (eRet != SUCCESS)
295       return eRet;
296   }
297   if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
298     RetainPtr<const CPDF_Reference> pMetadata =
299         ToReference(GetRoot()->GetObjectFor("Metadata"));
300     if (pMetadata)
301       m_MetadataObjnum = pMetadata->GetRefObjNum();
302   }
303   return SUCCESS;
304 }
305 
ParseStartXRef()306 FX_FILESIZE CPDF_Parser::ParseStartXRef() {
307   static constexpr char kStartXRefKeyword[] = "startxref";
308   m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword));
309   if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
310     return 0;
311 
312   // Skip "startxref" keyword.
313   m_pSyntax->GetKeyword();
314 
315   // Read XRef offset.
316   const CPDF_SyntaxParser::WordResult xref_offset_result =
317       m_pSyntax->GetNextWord();
318   if (!xref_offset_result.is_number || xref_offset_result.word.IsEmpty())
319     return 0;
320 
321   const FX_SAFE_FILESIZE result = FXSYS_atoi64(xref_offset_result.word.c_str());
322   if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize())
323     return 0;
324 
325   return result.ValueOrDie();
326 }
327 
SetEncryptHandler()328 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
329   ReleaseEncryptHandler();
330   if (!GetTrailer())
331     return FORMAT_ERROR;
332 
333   RetainPtr<const CPDF_Dictionary> pEncryptDict = GetEncryptDict();
334   if (!pEncryptDict)
335     return SUCCESS;
336 
337   if (pEncryptDict->GetNameFor("Filter") != "Standard")
338     return HANDLER_ERROR;
339 
340   auto pSecurityHandler = pdfium::MakeRetain<CPDF_SecurityHandler>();
341   if (!pSecurityHandler->OnInit(pEncryptDict, GetIDArray(), GetPassword()))
342     return PASSWORD_ERROR;
343 
344   m_pSecurityHandler = std::move(pSecurityHandler);
345   return SUCCESS;
346 }
347 
ReleaseEncryptHandler()348 void CPDF_Parser::ReleaseEncryptHandler() {
349   m_pSecurityHandler.Reset();
350 }
351 
352 // Ideally, all the cross reference entries should be verified.
353 // In reality, we rarely see well-formed cross references don't match
354 // with the objects. crbug/602650 showed a case where object numbers
355 // in the cross reference table are all off by one.
VerifyCrossRefV4()356 bool CPDF_Parser::VerifyCrossRefV4() {
357   for (const auto& it : m_CrossRefTable->objects_info()) {
358     if (it.second.pos <= 0)
359       continue;
360     // Find the first non-zero position.
361     FX_FILESIZE SavedPos = m_pSyntax->GetPos();
362     m_pSyntax->SetPos(it.second.pos);
363     CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
364     m_pSyntax->SetPos(SavedPos);
365     if (!word_result.is_number || word_result.word.IsEmpty() ||
366         FXSYS_atoui(word_result.word.c_str()) != it.first) {
367       // If the object number read doesn't match the one stored,
368       // something is wrong with the cross reference table.
369       return false;
370     }
371     break;
372   }
373   return true;
374 }
375 
LoadAllCrossRefV4(FX_FILESIZE xref_offset)376 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xref_offset) {
377   if (!LoadCrossRefV4(xref_offset, true))
378     return false;
379 
380   RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
381   if (!trailer)
382     return false;
383 
384   m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
385   const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
386   if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
387     ShrinkObjectMap(xrefsize);
388 
389   FX_FILESIZE xref_stm = GetTrailer()->GetDirectIntegerFor("XRefStm");
390   std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
391   std::vector<FX_FILESIZE> xref_list{xref_offset};
392   std::set<FX_FILESIZE> seen_xref_offset{xref_offset};
393 
394   // When the trailer doesn't have Prev entry or Prev entry value is not
395   // numerical, GetDirectInteger() returns 0. Loading will end.
396   xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
397   while (xref_offset > 0) {
398     // Check for circular references.
399     if (pdfium::Contains(seen_xref_offset, xref_offset))
400       return false;
401 
402     seen_xref_offset.insert(xref_offset);
403     xref_list.insert(xref_list.begin(), xref_offset);
404 
405     // SLOW ...
406     LoadCrossRefV4(xref_offset, true);
407 
408     RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
409     if (!pDict)
410       return false;
411 
412     xref_offset = pDict->GetDirectIntegerFor("Prev");
413     xref_stm = pDict->GetIntegerFor("XRefStm");
414     xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
415 
416     // SLOW ...
417     m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
418         std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
419                                              kNoV4TrailerObjectNumber),
420         std::move(m_CrossRefTable));
421   }
422 
423   for (size_t i = 0; i < xref_list.size(); ++i) {
424     if (xref_list[i] > 0 && !LoadCrossRefV4(xref_list[i], false))
425       return false;
426 
427     if (xref_stream_list[i] > 0 && !LoadCrossRefV5(&xref_stream_list[i], false))
428       return false;
429 
430     if (i == 0 && !VerifyCrossRefV4())
431       return false;
432   }
433   return true;
434 }
435 
LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset)436 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset) {
437   if (!LoadCrossRefV4(main_xref_offset, false))
438     return false;
439 
440   RetainPtr<CPDF_Dictionary> main_trailer = LoadTrailerV4();
441   if (!main_trailer)
442     return false;
443 
444   // GetTrailer() currently returns the first-page trailer.
445   if (GetTrailer()->GetDirectIntegerFor("Size") == 0)
446     return false;
447 
448   // Read /XRefStm from the first-page trailer. No need to read /Prev for the
449   // first-page trailer, as the caller already did that and passed it in as
450   // |main_xref_offset|.
451   FX_FILESIZE xref_stm = GetTrailer()->GetDirectIntegerFor("XRefStm");
452   std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
453   std::vector<FX_FILESIZE> xref_list{main_xref_offset};
454   std::set<FX_FILESIZE> seen_xref_offset{main_xref_offset};
455 
456   // Merge the trailers.
457   m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
458       std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer),
459                                            kNoV4TrailerObjectNumber),
460       std::move(m_CrossRefTable));
461 
462   // Now GetTrailer() returns the merged trailer, where /Prev is from the
463   // main-trailer.
464   FX_FILESIZE xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
465   while (xref_offset > 0) {
466     // Check for circular references.
467     if (pdfium::Contains(seen_xref_offset, xref_offset))
468       return false;
469 
470     seen_xref_offset.insert(xref_offset);
471     xref_list.insert(xref_list.begin(), xref_offset);
472 
473     // SLOW ...
474     LoadCrossRefV4(xref_offset, true);
475 
476     RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
477     if (!pDict)
478       return false;
479 
480     xref_offset = pDict->GetDirectIntegerFor("Prev");
481     xref_stm = pDict->GetIntegerFor("XRefStm");
482     xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
483 
484     // SLOW ...
485     m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
486         std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
487                                              kNoV4TrailerObjectNumber),
488         std::move(m_CrossRefTable));
489   }
490 
491   if (xref_stream_list[0] > 0 && !LoadCrossRefV5(&xref_stream_list[0], false))
492     return false;
493 
494   for (size_t i = 1; i < xref_list.size(); ++i) {
495     if (xref_list[i] > 0 && !LoadCrossRefV4(xref_list[i], false))
496       return false;
497 
498     if (xref_stream_list[i] > 0 && !LoadCrossRefV5(&xref_stream_list[i], false))
499       return false;
500   }
501   return true;
502 }
503 
ParseAndAppendCrossRefSubsectionData(uint32_t start_objnum,uint32_t count,std::vector<CrossRefObjData> * out_objects)504 bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
505     uint32_t start_objnum,
506     uint32_t count,
507     std::vector<CrossRefObjData>* out_objects) {
508   if (!count)
509     return true;
510 
511   // Each entry shall be exactly 20 byte.
512   // A sample entry looks like:
513   // "0000000000 00007 f\r\n"
514   static constexpr int32_t kEntrySize = 20;
515 
516   if (!out_objects) {
517     FX_SAFE_FILESIZE pos = count;
518     pos *= kEntrySize;
519     pos += m_pSyntax->GetPos();
520     if (!pos.IsValid())
521       return false;
522     m_pSyntax->SetPos(pos.ValueOrDie());
523     return true;
524   }
525   const size_t start_obj_index = out_objects->size();
526   FX_SAFE_SIZE_T new_size = start_obj_index;
527   new_size += count;
528   if (!new_size.IsValid())
529     return false;
530 
531   if (new_size.ValueOrDie() > kMaxXRefSize)
532     return false;
533 
534   const size_t max_entries_in_file = m_pSyntax->GetDocumentSize() / kEntrySize;
535   if (new_size.ValueOrDie() > max_entries_in_file)
536     return false;
537 
538   out_objects->resize(new_size.ValueOrDie());
539 
540   DataVector<char> buf(1024 * kEntrySize + 1);
541   buf.back() = '\0';
542 
543   uint32_t entries_to_read = count;
544   while (entries_to_read > 0) {
545     const uint32_t entries_in_block = std::min(entries_to_read, 1024u);
546     const uint32_t bytes_to_read = entries_in_block * kEntrySize;
547     auto block_span = pdfium::make_span(buf).first(bytes_to_read);
548     if (!m_pSyntax->ReadBlock(pdfium::as_writable_bytes(block_span)))
549       return false;
550 
551     for (uint32_t i = 0; i < entries_in_block; i++) {
552       uint32_t iObjectIndex = count - entries_to_read + i;
553       CrossRefObjData& obj_data =
554           (*out_objects)[start_obj_index + iObjectIndex];
555       const uint32_t objnum = start_objnum + iObjectIndex;
556       obj_data.obj_num = objnum;
557       ObjectInfo& info = obj_data.info;
558 
559       const char* pEntry = &buf[i * kEntrySize];
560       if (pEntry[17] == 'f') {
561         info.pos = 0;
562         info.type = ObjectType::kFree;
563       } else {
564         const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
565         if (!offset.IsValid())
566           return false;
567 
568         if (offset.ValueOrDie() == 0) {
569           for (int32_t c = 0; c < 10; c++) {
570             if (!isdigit(pEntry[c]))
571               return false;
572           }
573         }
574 
575         info.pos = offset.ValueOrDie();
576 
577         // TODO(art-snake): The info.gennum is uint16_t, but version may be
578         // greated than max<uint16_t>. Needs solve this issue.
579         const int32_t version = FXSYS_atoi(pEntry + 11);
580         info.gennum = version;
581         info.type = ObjectType::kNotCompressed;
582       }
583     }
584     entries_to_read -= entries_in_block;
585   }
586   return true;
587 }
588 
ParseCrossRefV4(std::vector<CrossRefObjData> * out_objects)589 bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
590   if (out_objects)
591     out_objects->clear();
592 
593   if (m_pSyntax->GetKeyword() != "xref")
594     return false;
595   std::vector<CrossRefObjData> result_objects;
596   while (true) {
597     FX_FILESIZE saved_pos = m_pSyntax->GetPos();
598     CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
599     const ByteString& word = word_result.word;
600     if (word.IsEmpty())
601       return false;
602 
603     if (!word_result.is_number) {
604       m_pSyntax->SetPos(saved_pos);
605       break;
606     }
607 
608     uint32_t start_objnum = FXSYS_atoui(word.c_str());
609     if (start_objnum >= kMaxObjectNumber)
610       return false;
611 
612     uint32_t count = m_pSyntax->GetDirectNum();
613     m_pSyntax->ToNextWord();
614 
615     if (!ParseAndAppendCrossRefSubsectionData(
616             start_objnum, count, out_objects ? &result_objects : nullptr)) {
617       return false;
618     }
619   }
620   if (out_objects)
621     *out_objects = std::move(result_objects);
622   return true;
623 }
624 
LoadCrossRefV4(FX_FILESIZE pos,bool bSkip)625 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, bool bSkip) {
626   m_pSyntax->SetPos(pos);
627   std::vector<CrossRefObjData> objects;
628   if (!ParseCrossRefV4(bSkip ? nullptr : &objects))
629     return false;
630 
631   MergeCrossRefObjectsData(objects);
632   return true;
633 }
634 
MergeCrossRefObjectsData(const std::vector<CrossRefObjData> & objects)635 void CPDF_Parser::MergeCrossRefObjectsData(
636     const std::vector<CrossRefObjData>& objects) {
637   for (const auto& obj : objects) {
638     switch (obj.info.type) {
639       case ObjectType::kFree:
640         if (obj.info.gennum > 0)
641           m_CrossRefTable->SetFree(obj.obj_num);
642         break;
643       case ObjectType::kNormal:
644       case ObjectType::kObjStream:
645         m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum, obj.info.pos);
646         break;
647       case ObjectType::kCompressed:
648         m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive.obj_num,
649                                        obj.info.archive.obj_index);
650         break;
651       default:
652         NOTREACHED();
653     }
654   }
655 }
656 
LoadAllCrossRefV5(FX_FILESIZE xref_offset)657 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xref_offset) {
658   if (!LoadCrossRefV5(&xref_offset, true))
659     return false;
660 
661   std::set<FX_FILESIZE> seen_xref_offset;
662   while (xref_offset > 0) {
663     seen_xref_offset.insert(xref_offset);
664     if (!LoadCrossRefV5(&xref_offset, false))
665       return false;
666 
667     // Check for circular references.
668     if (pdfium::Contains(seen_xref_offset, xref_offset))
669       return false;
670   }
671   m_ObjectStreamMap.clear();
672   m_bXRefStream = true;
673   return true;
674 }
675 
RebuildCrossRef()676 bool CPDF_Parser::RebuildCrossRef() {
677   auto cross_ref_table = std::make_unique<CPDF_CrossRefTable>();
678 
679   const uint32_t kBufferSize = 4096;
680   m_pSyntax->SetReadBufferSize(kBufferSize);
681   m_pSyntax->SetPos(0);
682 
683   std::vector<std::pair<uint32_t, FX_FILESIZE>> numbers;
684   for (CPDF_SyntaxParser::WordResult result = m_pSyntax->GetNextWord();
685        !result.word.IsEmpty(); result = m_pSyntax->GetNextWord()) {
686     const ByteString& word = result.word;
687     if (result.is_number) {
688       numbers.emplace_back(FXSYS_atoui(word.c_str()),
689                            m_pSyntax->GetPos() - word.GetLength());
690       if (numbers.size() > 2u)
691         numbers.erase(numbers.begin());
692       continue;
693     }
694 
695     if (word == "(") {
696       m_pSyntax->ReadString();
697     } else if (word == "<") {
698       m_pSyntax->ReadHexString();
699     } else if (word == "trailer") {
700       RetainPtr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
701       if (pTrailer) {
702         CPDF_Stream* stream_trailer = pTrailer->AsMutableStream();
703         // Grab the object number from `pTrailer` before potentially calling
704         // std::move(pTrailer) below.
705         const uint32_t trailer_object_number = pTrailer->GetObjNum();
706         RetainPtr<CPDF_Dictionary> trailer_dict =
707             stream_trailer ? stream_trailer->GetMutableDict()
708                            : ToDictionary(std::move(pTrailer));
709         cross_ref_table = CPDF_CrossRefTable::MergeUp(
710             std::move(cross_ref_table),
711             std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
712                                                  trailer_object_number));
713       }
714     } else if (word == "obj" && numbers.size() == 2u) {
715       const FX_FILESIZE obj_pos = numbers[0].second;
716       const uint32_t obj_num = numbers[0].first;
717       const uint32_t gen_num = numbers[1].first;
718 
719       m_pSyntax->SetPos(obj_pos);
720       const RetainPtr<CPDF_Stream> pStream =
721           ToStream(m_pSyntax->GetIndirectObject(
722               nullptr, CPDF_SyntaxParser::ParseType::kStrict));
723 
724       if (pStream && pStream->GetDict()->GetNameFor("Type") == "XRef") {
725         cross_ref_table = CPDF_CrossRefTable::MergeUp(
726             std::move(cross_ref_table),
727             std::make_unique<CPDF_CrossRefTable>(
728                 ToDictionary(pStream->GetDict()->Clone()),
729                 pStream->GetObjNum()));
730       }
731 
732       if (obj_num < kMaxObjectNumber) {
733         cross_ref_table->AddNormal(obj_num, gen_num, obj_pos);
734         const auto object_stream =
735             CPDF_ObjectStream::Create(std::move(pStream));
736         if (object_stream) {
737           const auto& object_info = object_stream->object_info();
738           for (size_t i = 0; i < object_info.size(); ++i) {
739             const auto& info = object_info[i];
740             if (info.obj_num < kMaxObjectNumber)
741               cross_ref_table->AddCompressed(info.obj_num, obj_num, i);
742           }
743         }
744       }
745     }
746     numbers.clear();
747   }
748 
749   m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable),
750                                                 std::move(cross_ref_table));
751   // Resore default buffer size.
752   m_pSyntax->SetReadBufferSize(CPDF_Stream::kFileBufSize);
753 
754   return GetTrailer() && !m_CrossRefTable->objects_info().empty();
755 }
756 
LoadCrossRefV5(FX_FILESIZE * pos,bool bMainXRef)757 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
758   RetainPtr<CPDF_Object> pObject(ParseIndirectObjectAt(*pos, 0));
759   if (!pObject || !pObject->GetObjNum())
760     return false;
761 
762   RetainPtr<const CPDF_Stream> pStream(pObject->AsStream());
763   if (!pStream)
764     return false;
765 
766   RetainPtr<const CPDF_Dictionary> pDict = pStream->GetDict();
767   int32_t prev = pDict->GetIntegerFor("Prev");
768   if (prev < 0)
769     return false;
770 
771   int32_t size = pDict->GetIntegerFor("Size");
772   if (size < 0)
773     return false;
774 
775   *pos = prev;
776 
777   RetainPtr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
778   if (bMainXRef) {
779     m_CrossRefTable = std::make_unique<CPDF_CrossRefTable>(
780         std::move(pNewTrailer), pStream->GetObjNum());
781     m_CrossRefTable->ShrinkObjectMap(size);
782   } else {
783     m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
784         std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer),
785                                              pStream->GetObjNum()),
786         std::move(m_CrossRefTable));
787   }
788 
789   std::vector<CrossRefV5IndexEntry> indices =
790       GetCrossRefV5Indices(pDict->GetArrayFor("Index").Get(), size);
791 
792   std::vector<uint32_t> field_widths =
793       GetFieldWidths(pDict->GetArrayFor("W").Get());
794   if (field_widths.size() < kMinFieldCount)
795     return false;
796 
797   FX_SAFE_UINT32 dwAccWidth;
798   for (uint32_t width : field_widths)
799     dwAccWidth += width;
800   if (!dwAccWidth.IsValid())
801     return false;
802 
803   uint32_t total_width = dwAccWidth.ValueOrDie();
804   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStream));
805   pAcc->LoadAllDataFiltered();
806 
807   pdfium::span<const uint8_t> data_span = pAcc->GetSpan();
808   uint32_t segindex = 0;
809   for (const auto& index : indices) {
810     FX_SAFE_UINT32 seg_end = segindex;
811     seg_end += index.obj_count;
812     seg_end *= total_width;
813     if (!seg_end.IsValid() || seg_end.ValueOrDie() > data_span.size())
814       continue;
815 
816     pdfium::span<const uint8_t> seg_span = data_span.subspan(
817         segindex * total_width, index.obj_count * total_width);
818     FX_SAFE_UINT32 dwMaxObjNum = index.start_obj_num;
819     dwMaxObjNum += index.obj_count;
820     uint32_t dwV5Size =
821         m_CrossRefTable->objects_info().empty() ? 0 : GetLastObjNum() + 1;
822     if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
823       continue;
824 
825     for (uint32_t i = 0; i < index.obj_count; ++i) {
826       const uint32_t obj_num = index.start_obj_num + i;
827       if (obj_num >= CPDF_Parser::kMaxObjectNumber)
828         break;
829 
830       ProcessCrossRefV5Entry(seg_span.subspan(i * total_width, total_width),
831                              field_widths, obj_num);
832     }
833 
834     segindex += index.obj_count;
835   }
836   return true;
837 }
838 
ProcessCrossRefV5Entry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths,uint32_t obj_num)839 void CPDF_Parser::ProcessCrossRefV5Entry(
840     pdfium::span<const uint8_t> entry_span,
841     pdfium::span<const uint32_t> field_widths,
842     uint32_t obj_num) {
843   DCHECK_GE(field_widths.size(), kMinFieldCount);
844   ObjectType type = ObjectType::kNotCompressed;
845   if (field_widths[0]) {
846     const uint32_t cross_ref_stream_obj_type =
847         GetFirstXRefStreamEntry(entry_span, field_widths);
848     type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
849     if (type == ObjectType::kNull)
850       return;
851   }
852 
853   const ObjectType existing_type = GetObjectType(obj_num);
854   if (existing_type == ObjectType::kNull) {
855     const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
856     if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
857       m_CrossRefTable->AddNormal(obj_num, 0, offset);
858     return;
859   }
860 
861   if (existing_type != ObjectType::kFree)
862     return;
863 
864   if (type == ObjectType::kFree) {
865     m_CrossRefTable->SetFree(obj_num);
866     return;
867   }
868 
869   if (type == ObjectType::kNotCompressed) {
870     const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
871     if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
872       m_CrossRefTable->AddNormal(obj_num, 0, offset);
873     return;
874   }
875 
876   DCHECK_EQ(type, ObjectType::kCompressed);
877   const uint32_t archive_obj_num =
878       GetSecondXRefStreamEntry(entry_span, field_widths);
879   if (!IsValidObjectNumber(archive_obj_num)) {
880     return;
881   }
882 
883   const uint32_t archive_obj_index =
884       GetThirdXRefStreamEntry(entry_span, field_widths);
885   m_CrossRefTable->AddCompressed(obj_num, archive_obj_num, archive_obj_index);
886 }
887 
GetIDArray() const888 RetainPtr<const CPDF_Array> CPDF_Parser::GetIDArray() const {
889   return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr;
890 }
891 
GetRoot() const892 RetainPtr<const CPDF_Dictionary> CPDF_Parser::GetRoot() const {
893   RetainPtr<CPDF_Object> obj =
894       m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum());
895   return obj ? obj->GetDict() : nullptr;
896 }
897 
GetEncryptDict() const898 RetainPtr<const CPDF_Dictionary> CPDF_Parser::GetEncryptDict() const {
899   if (!GetTrailer())
900     return nullptr;
901 
902   RetainPtr<const CPDF_Object> pEncryptObj =
903       GetTrailer()->GetObjectFor("Encrypt");
904   if (!pEncryptObj)
905     return nullptr;
906 
907   if (pEncryptObj->IsDictionary())
908     return pdfium::WrapRetain(pEncryptObj->AsDictionary());
909 
910   if (pEncryptObj->IsReference()) {
911     return ToDictionary(m_pObjectsHolder->GetOrParseIndirectObject(
912         pEncryptObj->AsReference()->GetRefObjNum()));
913   }
914   return nullptr;
915 }
916 
GetEncodedPassword() const917 ByteString CPDF_Parser::GetEncodedPassword() const {
918   return GetSecurityHandler()->GetEncodedPassword(GetPassword().AsStringView());
919 }
920 
GetTrailer() const921 const CPDF_Dictionary* CPDF_Parser::GetTrailer() const {
922   return m_CrossRefTable->trailer();
923 }
924 
GetMutableTrailerForTesting()925 CPDF_Dictionary* CPDF_Parser::GetMutableTrailerForTesting() {
926   return m_CrossRefTable->GetMutableTrailerForTesting();
927 }
928 
GetTrailerObjectNumber() const929 uint32_t CPDF_Parser::GetTrailerObjectNumber() const {
930   return m_CrossRefTable->trailer_object_number();
931 }
932 
GetCombinedTrailer() const933 RetainPtr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
934   return m_CrossRefTable->trailer()
935              ? ToDictionary(m_CrossRefTable->trailer()->Clone())
936              : RetainPtr<CPDF_Dictionary>();
937 }
938 
GetInfoObjNum() const939 uint32_t CPDF_Parser::GetInfoObjNum() const {
940   RetainPtr<const CPDF_Reference> pRef =
941       ToReference(m_CrossRefTable->trailer()
942                       ? m_CrossRefTable->trailer()->GetObjectFor("Info")
943                       : nullptr);
944   return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
945 }
946 
GetRootObjNum() const947 uint32_t CPDF_Parser::GetRootObjNum() const {
948   RetainPtr<const CPDF_Reference> pRef =
949       ToReference(m_CrossRefTable->trailer()
950                       ? m_CrossRefTable->trailer()->GetObjectFor("Root")
951                       : nullptr);
952   return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
953 }
954 
ParseIndirectObject(uint32_t objnum)955 RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObject(uint32_t objnum) {
956   if (!IsValidObjectNumber(objnum))
957     return nullptr;
958 
959   // Prevent circular parsing the same object.
960   if (pdfium::Contains(m_ParsingObjNums, objnum))
961     return nullptr;
962 
963   ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
964   if (GetObjectType(objnum) == ObjectType::kNotCompressed) {
965     FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
966     if (pos <= 0)
967       return nullptr;
968     return ParseIndirectObjectAt(pos, objnum);
969   }
970   if (GetObjectType(objnum) != ObjectType::kCompressed)
971     return nullptr;
972 
973   const ObjectInfo& info = *m_CrossRefTable->GetObjectInfo(objnum);
974   const CPDF_ObjectStream* pObjStream = GetObjectStream(info.archive.obj_num);
975   if (!pObjStream)
976     return nullptr;
977 
978   return pObjStream->ParseObject(m_pObjectsHolder, objnum,
979                                  info.archive.obj_index);
980 }
981 
GetObjectStream(uint32_t object_number)982 const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) {
983   // Prevent circular parsing the same object.
984   if (pdfium::Contains(m_ParsingObjNums, object_number))
985     return nullptr;
986 
987   auto it = m_ObjectStreamMap.find(object_number);
988   if (it != m_ObjectStreamMap.end())
989     return it->second.get();
990 
991   const auto* info = m_CrossRefTable->GetObjectInfo(object_number);
992   if (!info || info->type != ObjectType::kObjStream)
993     return nullptr;
994 
995   const FX_FILESIZE object_pos = info->pos;
996   if (object_pos <= 0)
997     return nullptr;
998 
999   // Keep track of `object_number` before doing more parsing.
1000   ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, object_number);
1001 
1002   RetainPtr<CPDF_Object> object =
1003       ParseIndirectObjectAt(object_pos, object_number);
1004   if (!object)
1005     return nullptr;
1006 
1007   std::unique_ptr<CPDF_ObjectStream> objs_stream =
1008       CPDF_ObjectStream::Create(ToStream(object));
1009   const CPDF_ObjectStream* result = objs_stream.get();
1010   m_ObjectStreamMap[object_number] = std::move(objs_stream);
1011 
1012   return result;
1013 }
1014 
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum)1015 RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(FX_FILESIZE pos,
1016                                                           uint32_t objnum) {
1017   const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
1018   m_pSyntax->SetPos(pos);
1019 
1020   auto result = m_pSyntax->GetIndirectObject(
1021       m_pObjectsHolder, CPDF_SyntaxParser::ParseType::kLoose);
1022   m_pSyntax->SetPos(saved_pos);
1023   if (result && objnum && result->GetObjNum() != objnum)
1024     return nullptr;
1025 
1026   const bool should_decrypt = m_pSecurityHandler &&
1027                               m_pSecurityHandler->GetCryptoHandler() &&
1028                               objnum != m_MetadataObjnum;
1029   if (should_decrypt &&
1030       !m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(result)) {
1031     return nullptr;
1032   }
1033   return result;
1034 }
1035 
GetDocumentSize() const1036 FX_FILESIZE CPDF_Parser::GetDocumentSize() const {
1037   return m_pSyntax->GetDocumentSize();
1038 }
1039 
GetFirstPageNo() const1040 uint32_t CPDF_Parser::GetFirstPageNo() const {
1041   return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1042 }
1043 
SetLinearizedHeaderForTesting(std::unique_ptr<CPDF_LinearizedHeader> pLinearized)1044 void CPDF_Parser::SetLinearizedHeaderForTesting(
1045     std::unique_ptr<CPDF_LinearizedHeader> pLinearized) {
1046   m_pLinearized = std::move(pLinearized);
1047 }
1048 
LoadTrailerV4()1049 RetainPtr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
1050   if (m_pSyntax->GetKeyword() != "trailer")
1051     return nullptr;
1052 
1053   return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder));
1054 }
1055 
GetPermissions() const1056 uint32_t CPDF_Parser::GetPermissions() const {
1057   return m_pSecurityHandler ? m_pSecurityHandler->GetPermissions() : 0xFFFFFFFF;
1058 }
1059 
ParseLinearizedHeader()1060 std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() {
1061   return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
1062 }
1063 
StartLinearizedParse(RetainPtr<CPDF_ReadValidator> validator,const ByteString & password)1064 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
1065     RetainPtr<CPDF_ReadValidator> validator,
1066     const ByteString& password) {
1067   DCHECK(!m_bHasParsed);
1068   DCHECK(!m_bXRefTableRebuilt);
1069   SetPassword(password);
1070   m_bXRefStream = false;
1071   m_LastXRefOffset = 0;
1072 
1073   if (!InitSyntaxParser(std::move(validator)))
1074     return FORMAT_ERROR;
1075 
1076   m_pLinearized = ParseLinearizedHeader();
1077   if (!m_pLinearized)
1078     return StartParseInternal();
1079 
1080   m_bHasParsed = true;
1081 
1082   m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
1083   FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
1084   bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
1085   if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
1086     if (!RebuildCrossRef())
1087       return FORMAT_ERROR;
1088 
1089     m_bXRefTableRebuilt = true;
1090     m_LastXRefOffset = 0;
1091   }
1092   if (bLoadV4) {
1093     RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
1094     if (!trailer)
1095       return SUCCESS;
1096 
1097     m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
1098     const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
1099     if (xrefsize > 0) {
1100       // Check if `xrefsize` is correct. If it is incorrect, give up and rebuild
1101       // the xref table.
1102       const uint32_t expected_last_obj_num = xrefsize - 1;
1103       if (GetLastObjNum() != expected_last_obj_num && !RebuildCrossRef()) {
1104         return FORMAT_ERROR;
1105       }
1106     }
1107   }
1108 
1109   Error eRet = SetEncryptHandler();
1110   if (eRet != SUCCESS)
1111     return eRet;
1112 
1113   if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
1114     if (m_bXRefTableRebuilt)
1115       return FORMAT_ERROR;
1116 
1117     ReleaseEncryptHandler();
1118     if (!RebuildCrossRef())
1119       return FORMAT_ERROR;
1120 
1121     eRet = SetEncryptHandler();
1122     if (eRet != SUCCESS)
1123       return eRet;
1124 
1125     m_pObjectsHolder->TryInit();
1126     if (!GetRoot())
1127       return FORMAT_ERROR;
1128   }
1129 
1130   if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) {
1131     ReleaseEncryptHandler();
1132     if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum)
1133       return FORMAT_ERROR;
1134 
1135     eRet = SetEncryptHandler();
1136     if (eRet != SUCCESS)
1137       return eRet;
1138   }
1139 
1140   if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1141     RetainPtr<const CPDF_Reference> pMetadata =
1142         ToReference(GetRoot()->GetObjectFor("Metadata"));
1143     if (pMetadata)
1144       m_MetadataObjnum = pMetadata->GetRefObjNum();
1145   }
1146   return SUCCESS;
1147 }
1148 
LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset)1149 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset) {
1150   FX_FILESIZE xref_offset = main_xref_offset;
1151   if (!LoadCrossRefV5(&xref_offset, false))
1152     return false;
1153 
1154   std::set<FX_FILESIZE> seen_xref_offset;
1155   while (xref_offset) {
1156     seen_xref_offset.insert(xref_offset);
1157     if (!LoadCrossRefV5(&xref_offset, false))
1158       return false;
1159 
1160     // Check for circular references.
1161     if (pdfium::Contains(seen_xref_offset, xref_offset))
1162       return false;
1163   }
1164   m_ObjectStreamMap.clear();
1165   m_bXRefStream = true;
1166   return true;
1167 }
1168 
LoadLinearizedMainXRefTable()1169 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1170   const FX_SAFE_FILESIZE prev = GetTrailer()->GetIntegerFor("Prev");
1171   const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
1172   if (main_xref_offset < 0)
1173     return FORMAT_ERROR;
1174 
1175   if (main_xref_offset == 0)
1176     return SUCCESS;
1177 
1178   const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
1179   m_MetadataObjnum = 0;
1180   m_ObjectStreamMap.clear();
1181 
1182   if (!LoadLinearizedAllCrossRefV4(main_xref_offset) &&
1183       !LoadLinearizedAllCrossRefV5(main_xref_offset)) {
1184     m_LastXRefOffset = 0;
1185     return FORMAT_ERROR;
1186   }
1187 
1188   return SUCCESS;
1189 }
1190 
SetSyntaxParserForTesting(std::unique_ptr<CPDF_SyntaxParser> parser)1191 void CPDF_Parser::SetSyntaxParserForTesting(
1192     std::unique_ptr<CPDF_SyntaxParser> parser) {
1193   m_pSyntax = std::move(parser);
1194 }
1195 
GetTrailerEnds()1196 std::vector<unsigned int> CPDF_Parser::GetTrailerEnds() {
1197   std::vector<unsigned int> trailer_ends;
1198   m_pSyntax->SetTrailerEnds(&trailer_ends);
1199 
1200   // Traverse the document.
1201   m_pSyntax->SetPos(0);
1202   while (true) {
1203     CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
1204     if (word_result.is_number) {
1205       // The object number was read. Read the generation number.
1206       word_result = m_pSyntax->GetNextWord();
1207       if (!word_result.is_number)
1208         break;
1209 
1210       word_result = m_pSyntax->GetNextWord();
1211       if (word_result.word != "obj")
1212         break;
1213 
1214       m_pSyntax->GetObjectBody(nullptr);
1215 
1216       word_result = m_pSyntax->GetNextWord();
1217       if (word_result.word != "endobj")
1218         break;
1219     } else if (word_result.word == "trailer") {
1220       m_pSyntax->GetObjectBody(nullptr);
1221     } else if (word_result.word == "startxref") {
1222       m_pSyntax->GetNextWord();
1223     } else if (word_result.word == "xref") {
1224       while (true) {
1225         word_result = m_pSyntax->GetNextWord();
1226         if (word_result.word.IsEmpty() || word_result.word == "startxref")
1227           break;
1228       }
1229       m_pSyntax->GetNextWord();
1230     } else {
1231       break;
1232     }
1233   }
1234 
1235   // Stop recording trailer ends.
1236   m_pSyntax->SetTrailerEnds(nullptr);
1237   return trailer_ends;
1238 }
1239 
WriteToArchive(IFX_ArchiveStream * archive,FX_FILESIZE src_size)1240 bool CPDF_Parser::WriteToArchive(IFX_ArchiveStream* archive,
1241                                  FX_FILESIZE src_size) {
1242   static constexpr FX_FILESIZE kBufferSize = 4096;
1243   DataVector<uint8_t> buffer(kBufferSize);
1244   m_pSyntax->SetPos(0);
1245   while (src_size) {
1246     const uint32_t block_size =
1247         static_cast<uint32_t>(std::min(kBufferSize, src_size));
1248     auto block_span = pdfium::make_span(buffer).first(block_size);
1249     if (!m_pSyntax->ReadBlock(block_span))
1250       return false;
1251     if (!archive->WriteBlock(pdfium::make_span(buffer).first(block_size)))
1252       return false;
1253     src_size -= block_size;
1254   }
1255   return true;
1256 }
1257