1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12
13 #include "core/fpdfapi/cpdf_modulemgr.h"
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
16 #include "core/fpdfapi/parser/cpdf_dictionary.h"
17 #include "core/fpdfapi/parser/cpdf_document.h"
18 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
19 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
20 #include "core/fpdfapi/parser/cpdf_name.h"
21 #include "core/fpdfapi/parser/cpdf_number.h"
22 #include "core/fpdfapi/parser/cpdf_page_object_avail.h"
23 #include "core/fpdfapi/parser/cpdf_read_validator.h"
24 #include "core/fpdfapi/parser/cpdf_reference.h"
25 #include "core/fpdfapi/parser/cpdf_stream.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcrt/cfx_memorystream.h"
28 #include "core/fxcrt/fx_extension.h"
29 #include "core/fxcrt/fx_safe_types.h"
30 #include "third_party/base/numerics/safe_conversions.h"
31 #include "third_party/base/ptr_util.h"
32 #include "third_party/base/stl_util.h"
33
34 namespace {
35
36 // static
GetResourceObject(const CPDF_Dictionary * pDict)37 const CPDF_Object* GetResourceObject(const CPDF_Dictionary* pDict) {
38 constexpr size_t kMaxHierarchyDepth = 64;
39 size_t depth = 0;
40
41 const CPDF_Dictionary* dictionary_to_check = pDict;
42 while (dictionary_to_check) {
43 const CPDF_Object* result = dictionary_to_check->GetObjectFor("Resources");
44 if (result)
45 return result;
46 const CPDF_Object* parent = dictionary_to_check->GetObjectFor("Parent");
47 dictionary_to_check = parent ? parent->GetDict() : nullptr;
48
49 if (++depth > kMaxHierarchyDepth) {
50 // We have cycle in parents hierarchy.
51 return nullptr;
52 }
53 }
54 return nullptr;
55 }
56
57 class HintsScope {
58 public:
HintsScope(CPDF_ReadValidator * validator,CPDF_DataAvail::DownloadHints * hints)59 HintsScope(CPDF_ReadValidator* validator,
60 CPDF_DataAvail::DownloadHints* hints)
61 : validator_(validator) {
62 ASSERT(validator_);
63 validator_->SetDownloadHints(hints);
64 }
65
~HintsScope()66 ~HintsScope() { validator_->SetDownloadHints(nullptr); }
67
68 private:
69 UnownedPtr<CPDF_ReadValidator> validator_;
70 };
71
72 } // namespace
73
~FileAvail()74 CPDF_DataAvail::FileAvail::~FileAvail() {}
75
~DownloadHints()76 CPDF_DataAvail::DownloadHints::~DownloadHints() {}
77
CPDF_DataAvail(FileAvail * pFileAvail,const RetainPtr<IFX_SeekableReadStream> & pFileRead,bool bSupportHintTable)78 CPDF_DataAvail::CPDF_DataAvail(
79 FileAvail* pFileAvail,
80 const RetainPtr<IFX_SeekableReadStream>& pFileRead,
81 bool bSupportHintTable)
82 : m_pFileAvail(pFileAvail),
83 m_pFileRead(
84 pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, m_pFileAvail)),
85 m_dwFileLen(m_pFileRead->GetSize()),
86 m_bSupportHintTable(bSupportHintTable) {}
87
~CPDF_DataAvail()88 CPDF_DataAvail::~CPDF_DataAvail() {
89 m_pHintTables.reset();
90 }
91
IsDocAvail(DownloadHints * pHints)92 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
93 DownloadHints* pHints) {
94 if (!m_dwFileLen)
95 return DataError;
96
97 const HintsScope hints_scope(m_pFileRead.Get(), pHints);
98
99 while (!m_bDocAvail) {
100 if (!CheckDocStatus())
101 return DataNotAvailable;
102 }
103
104 return DataAvailable;
105 }
106
CheckDocStatus()107 bool CPDF_DataAvail::CheckDocStatus() {
108 switch (m_docStatus) {
109 case PDF_DATAAVAIL_HEADER:
110 return CheckHeader();
111 case PDF_DATAAVAIL_FIRSTPAGE:
112 return CheckFirstPage();
113 case PDF_DATAAVAIL_HINTTABLE:
114 return CheckHintTables();
115 case PDF_DATAAVAIL_LOADALLCROSSREF:
116 return CheckAndLoadAllXref();
117 case PDF_DATAAVAIL_LOADALLFILE:
118 return LoadAllFile();
119 case PDF_DATAAVAIL_ROOT:
120 return CheckRoot();
121 case PDF_DATAAVAIL_INFO:
122 return CheckInfo();
123 case PDF_DATAAVAIL_PAGETREE:
124 if (m_bTotalLoadPageTree)
125 return CheckPages();
126 return LoadDocPages();
127 case PDF_DATAAVAIL_PAGE:
128 if (m_bTotalLoadPageTree)
129 return CheckPage();
130 m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
131 return true;
132 case PDF_DATAAVAIL_ERROR:
133 return LoadAllFile();
134 case PDF_DATAAVAIL_PAGE_LATERLOAD:
135 m_docStatus = PDF_DATAAVAIL_PAGE;
136 default:
137 m_bDocAvail = true;
138 return true;
139 }
140 }
141
CheckPageStatus()142 bool CPDF_DataAvail::CheckPageStatus() {
143 switch (m_docStatus) {
144 case PDF_DATAAVAIL_PAGETREE:
145 return CheckPages();
146 case PDF_DATAAVAIL_PAGE:
147 return CheckPage();
148 case PDF_DATAAVAIL_ERROR:
149 return LoadAllFile();
150 default:
151 m_bPagesTreeLoad = true;
152 m_bPagesLoad = true;
153 return true;
154 }
155 }
156
LoadAllFile()157 bool CPDF_DataAvail::LoadAllFile() {
158 if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
159 m_docStatus = PDF_DATAAVAIL_DONE;
160 return true;
161 }
162 return false;
163 }
164
CheckAndLoadAllXref()165 bool CPDF_DataAvail::CheckAndLoadAllXref() {
166 if (!m_pCrossRefAvail) {
167 const CPDF_ReadValidator::Session read_session(GetValidator().Get());
168 const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
169 if (GetValidator()->has_read_problems())
170 return false;
171
172 if (last_xref_offset <= 0) {
173 m_docStatus = PDF_DATAAVAIL_ERROR;
174 return false;
175 }
176
177 m_pCrossRefAvail = pdfium::MakeUnique<CPDF_CrossRefAvail>(GetSyntaxParser(),
178 last_xref_offset);
179 }
180
181 switch (m_pCrossRefAvail->CheckAvail()) {
182 case DocAvailStatus::DataAvailable:
183 break;
184 case DocAvailStatus::DataNotAvailable:
185 return false;
186 case DocAvailStatus::DataError:
187 m_docStatus = PDF_DATAAVAIL_ERROR;
188 return false;
189 default:
190 NOTREACHED();
191 return false;
192 }
193
194 if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
195 !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
196 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
197 return false;
198 }
199
200 m_dwRootObjNum = m_parser.GetRootObjNum();
201 m_dwInfoObjNum = m_parser.GetInfoObjNum();
202 m_pCurrentParser = &m_parser;
203 m_docStatus = PDF_DATAAVAIL_ROOT;
204 return true;
205 }
206
GetObject(uint32_t objnum,bool * pExistInFile)207 std::unique_ptr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
208 bool* pExistInFile) {
209 CPDF_Parser* pParser = nullptr;
210
211 if (pExistInFile)
212 *pExistInFile = true;
213
214 pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
215
216 std::unique_ptr<CPDF_Object> pRet;
217 if (pParser) {
218 const CPDF_ReadValidator::Session read_session(GetValidator().Get());
219 pRet = pParser->ParseIndirectObject(nullptr, objnum);
220 if (GetValidator()->has_read_problems())
221 return nullptr;
222 }
223
224 if (!pRet && pExistInFile)
225 *pExistInFile = false;
226
227 return pRet;
228 }
229
CheckInfo()230 bool CPDF_DataAvail::CheckInfo() {
231 bool bExist = false;
232 std::unique_ptr<CPDF_Object> pInfo = GetObject(m_dwInfoObjNum, &bExist);
233 if (bExist && !pInfo) {
234 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
235 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
236 return true;
237 }
238 return false;
239 }
240 m_docStatus = PDF_DATAAVAIL_PAGETREE;
241 return true;
242 }
243
CheckRoot()244 bool CPDF_DataAvail::CheckRoot() {
245 bool bExist = false;
246 m_pRoot = GetObject(m_dwRootObjNum, &bExist);
247 if (!bExist) {
248 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
249 return true;
250 }
251
252 if (!m_pRoot) {
253 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
254 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
255 return true;
256 }
257 return false;
258 }
259
260 CPDF_Dictionary* pDict = m_pRoot->GetDict();
261 if (!pDict) {
262 m_docStatus = PDF_DATAAVAIL_ERROR;
263 return false;
264 }
265
266 CPDF_Reference* pRef = ToReference(pDict->GetObjectFor("Pages"));
267 if (!pRef) {
268 m_docStatus = PDF_DATAAVAIL_ERROR;
269 return false;
270 }
271
272 m_PagesObjNum = pRef->GetRefObjNum();
273
274 m_docStatus = m_dwInfoObjNum ? PDF_DATAAVAIL_INFO : PDF_DATAAVAIL_PAGETREE;
275 return true;
276 }
277
PreparePageItem()278 bool CPDF_DataAvail::PreparePageItem() {
279 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
280 CPDF_Reference* pRef =
281 ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
282 if (!pRef) {
283 m_docStatus = PDF_DATAAVAIL_ERROR;
284 return false;
285 }
286
287 m_PagesObjNum = pRef->GetRefObjNum();
288 m_pCurrentParser = m_pDocument->GetParser();
289 m_docStatus = PDF_DATAAVAIL_PAGETREE;
290 return true;
291 }
292
IsFirstCheck(uint32_t dwPage)293 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
294 return m_pageMapCheckState.insert(dwPage).second;
295 }
296
ResetFirstCheck(uint32_t dwPage)297 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
298 m_pageMapCheckState.erase(dwPage);
299 }
300
CheckPage()301 bool CPDF_DataAvail::CheckPage() {
302 std::vector<uint32_t> UnavailObjList;
303 for (uint32_t dwPageObjNum : m_PageObjList) {
304 bool bExists = false;
305 std::unique_ptr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
306 if (!pObj) {
307 if (bExists)
308 UnavailObjList.push_back(dwPageObjNum);
309 continue;
310 }
311 CPDF_Array* pArray = ToArray(pObj.get());
312 if (pArray) {
313 for (const auto& pArrayObj : *pArray) {
314 if (CPDF_Reference* pRef = ToReference(pArrayObj.get()))
315 UnavailObjList.push_back(pRef->GetRefObjNum());
316 }
317 }
318 if (!pObj->IsDictionary())
319 continue;
320
321 ByteString type = pObj->GetDict()->GetStringFor("Type");
322 if (type == "Pages") {
323 m_PagesArray.push_back(std::move(pObj));
324 continue;
325 }
326 }
327 m_PageObjList.clear();
328 if (!UnavailObjList.empty()) {
329 m_PageObjList = std::move(UnavailObjList);
330 return false;
331 }
332 size_t iPages = m_PagesArray.size();
333 for (size_t i = 0; i < iPages; ++i) {
334 std::unique_ptr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
335 if (pPages && !GetPageKids(m_pCurrentParser, pPages.get())) {
336 m_PagesArray.clear();
337 m_docStatus = PDF_DATAAVAIL_ERROR;
338 return false;
339 }
340 }
341 m_PagesArray.clear();
342 if (m_PageObjList.empty())
343 m_docStatus = PDF_DATAAVAIL_DONE;
344
345 return true;
346 }
347
GetPageKids(CPDF_Parser * pParser,CPDF_Object * pPages)348 bool CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) {
349 if (!pParser) {
350 m_docStatus = PDF_DATAAVAIL_ERROR;
351 return false;
352 }
353
354 CPDF_Dictionary* pDict = pPages->GetDict();
355 CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
356 if (!pKids)
357 return true;
358
359 switch (pKids->GetType()) {
360 case CPDF_Object::REFERENCE:
361 m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum());
362 break;
363 case CPDF_Object::ARRAY: {
364 CPDF_Array* pKidsArray = pKids->AsArray();
365 for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
366 if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i)))
367 m_PageObjList.push_back(pRef->GetRefObjNum());
368 }
369 break;
370 }
371 default:
372 m_docStatus = PDF_DATAAVAIL_ERROR;
373 return false;
374 }
375 return true;
376 }
377
CheckPages()378 bool CPDF_DataAvail::CheckPages() {
379 bool bExists = false;
380 std::unique_ptr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
381 if (!bExists) {
382 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
383 return true;
384 }
385
386 if (!pPages) {
387 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
388 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
389 return true;
390 }
391 return false;
392 }
393
394 if (!GetPageKids(m_pCurrentParser, pPages.get())) {
395 m_docStatus = PDF_DATAAVAIL_ERROR;
396 return false;
397 }
398
399 m_docStatus = PDF_DATAAVAIL_PAGE;
400 return true;
401 }
402
CheckHeader()403 bool CPDF_DataAvail::CheckHeader() {
404 switch (CheckHeaderAndLinearized()) {
405 case DocAvailStatus::DataAvailable:
406 m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE
407 : PDF_DATAAVAIL_LOADALLCROSSREF;
408 return true;
409 case DocAvailStatus::DataNotAvailable:
410 return false;
411 case DocAvailStatus::DataError:
412 m_docStatus = PDF_DATAAVAIL_ERROR;
413 return true;
414 default:
415 NOTREACHED();
416 return false;
417 }
418 }
419
CheckFirstPage()420 bool CPDF_DataAvail::CheckFirstPage() {
421 if (!m_pLinearized->GetFirstPageEndOffset() ||
422 !m_pLinearized->GetFileSize() ||
423 !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
424 m_docStatus = PDF_DATAAVAIL_ERROR;
425 return false;
426 }
427
428 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
429 dwEnd += 512;
430 if ((FX_FILESIZE)dwEnd > m_dwFileLen)
431 dwEnd = (uint32_t)m_dwFileLen;
432
433 const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
434 const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
435 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
436 data_size))
437 return false;
438
439 m_docStatus =
440 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
441 return true;
442 }
443
CheckHintTables()444 bool CPDF_DataAvail::CheckHintTables() {
445 if (m_pLinearized->GetPageCount() <= 1) {
446 m_docStatus = PDF_DATAAVAIL_DONE;
447 return true;
448 }
449 if (!m_pLinearized->HasHintTable()) {
450 m_docStatus = PDF_DATAAVAIL_ERROR;
451 return false;
452 }
453
454 const FX_FILESIZE szHintStart = m_pLinearized->GetHintStart();
455 const uint32_t szHintLength = m_pLinearized->GetHintLength();
456
457 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(szHintStart,
458 szHintLength))
459 return false;
460
461 auto pHintTables = pdfium::MakeUnique<CPDF_HintTables>(GetValidator().Get(),
462 m_pLinearized.get());
463 std::unique_ptr<CPDF_Object> pHintStream =
464 ParseIndirectObjectAt(szHintStart, 0);
465 CPDF_Stream* pStream = ToStream(pHintStream.get());
466 if (pStream && pHintTables->LoadHintStream(pStream))
467 m_pHintTables = std::move(pHintTables);
468
469 m_docStatus = PDF_DATAAVAIL_DONE;
470 return true;
471 }
472
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList)473 std::unique_ptr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
474 FX_FILESIZE pos,
475 uint32_t objnum,
476 CPDF_IndirectObjectHolder* pObjList) {
477 const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
478 GetSyntaxParser()->SetPos(pos);
479 std::unique_ptr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
480 pObjList, CPDF_SyntaxParser::ParseType::kLoose);
481 GetSyntaxParser()->SetPos(SavedPos);
482 return (result && (!objnum || result->GetObjNum() == objnum))
483 ? std::move(result)
484 : nullptr;
485 }
486
IsLinearizedPDF()487 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
488 switch (CheckHeaderAndLinearized()) {
489 case DocAvailStatus::DataAvailable:
490 return m_pLinearized ? DocLinearizationStatus::Linearized
491 : DocLinearizationStatus::NotLinearized;
492 case DocAvailStatus::DataNotAvailable:
493 return DocLinearizationStatus::LinearizationUnknown;
494 case DocAvailStatus::DataError:
495 return DocLinearizationStatus::NotLinearized;
496 default:
497 NOTREACHED();
498 return DocLinearizationStatus::LinearizationUnknown;
499 }
500 }
501
CheckHeaderAndLinearized()502 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
503 if (m_bHeaderAvail)
504 return DocAvailStatus::DataAvailable;
505
506 const CPDF_ReadValidator::Session read_session(GetValidator().Get());
507 const int32_t header_offset = GetHeaderOffset(GetValidator());
508 if (GetValidator()->has_read_problems())
509 return DocAvailStatus::DataNotAvailable;
510
511 if (header_offset == kInvalidHeaderOffset)
512 return DocAvailStatus::DataError;
513
514 m_parser.m_pSyntax->InitParserWithValidator(GetValidator(), header_offset);
515 m_pLinearized = m_parser.ParseLinearizedHeader();
516 if (GetValidator()->has_read_problems())
517 return DocAvailStatus::DataNotAvailable;
518
519 m_bHeaderAvail = true;
520 return DocAvailStatus::DataAvailable;
521 }
522
CheckPage(uint32_t dwPage)523 bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
524 while (true) {
525 switch (m_docStatus) {
526 case PDF_DATAAVAIL_PAGETREE:
527 if (!LoadDocPages())
528 return false;
529 break;
530 case PDF_DATAAVAIL_PAGE:
531 if (!LoadDocPage(dwPage))
532 return false;
533 break;
534 case PDF_DATAAVAIL_ERROR:
535 return LoadAllFile();
536 default:
537 m_bPagesTreeLoad = true;
538 m_bPagesLoad = true;
539 m_bCurPageDictLoadOK = true;
540 m_docStatus = PDF_DATAAVAIL_PAGE;
541 return true;
542 }
543 }
544 }
545
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode)546 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
547 PageNode* pPageNode) {
548 bool bExists = false;
549 std::unique_ptr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
550 if (!bExists) {
551 m_docStatus = PDF_DATAAVAIL_ERROR;
552 return false;
553 }
554
555 if (!pPages)
556 return false;
557
558 CPDF_Array* pArray = pPages->AsArray();
559 if (!pArray) {
560 m_docStatus = PDF_DATAAVAIL_ERROR;
561 return false;
562 }
563
564 pPageNode->m_type = PDF_PAGENODE_PAGES;
565 for (size_t i = 0; i < pArray->GetCount(); ++i) {
566 CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
567 if (!pKid)
568 continue;
569
570 auto pNode = pdfium::MakeUnique<PageNode>();
571 pNode->m_dwPageNo = pKid->GetRefObjNum();
572 pPageNode->m_ChildNodes.push_back(std::move(pNode));
573 }
574 return true;
575 }
576
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode)577 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
578 PageNode* pPageNode) {
579 bool bExists = false;
580 std::unique_ptr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
581 if (!bExists) {
582 m_docStatus = PDF_DATAAVAIL_ERROR;
583 return false;
584 }
585
586 if (!pPage)
587 return false;
588
589 if (pPage->IsArray()) {
590 pPageNode->m_dwPageNo = dwPageNo;
591 pPageNode->m_type = PDF_PAGENODE_ARRAY;
592 return true;
593 }
594
595 if (!pPage->IsDictionary()) {
596 m_docStatus = PDF_DATAAVAIL_ERROR;
597 return false;
598 }
599
600 pPageNode->m_dwPageNo = dwPageNo;
601 CPDF_Dictionary* pDict = pPage->GetDict();
602 const ByteString type = pDict->GetStringFor("Type");
603 if (type == "Page") {
604 pPageNode->m_type = PDF_PAGENODE_PAGE;
605 return true;
606 }
607
608 if (type != "Pages") {
609 m_docStatus = PDF_DATAAVAIL_ERROR;
610 return false;
611 }
612
613 pPageNode->m_type = PDF_PAGENODE_PAGES;
614 CPDF_Object* pKids = pDict->GetObjectFor("Kids");
615 if (!pKids) {
616 m_docStatus = PDF_DATAAVAIL_PAGE;
617 return true;
618 }
619
620 switch (pKids->GetType()) {
621 case CPDF_Object::REFERENCE: {
622 CPDF_Reference* pKid = pKids->AsReference();
623 auto pNode = pdfium::MakeUnique<PageNode>();
624 pNode->m_dwPageNo = pKid->GetRefObjNum();
625 pPageNode->m_ChildNodes.push_back(std::move(pNode));
626 break;
627 }
628 case CPDF_Object::ARRAY: {
629 CPDF_Array* pKidsArray = pKids->AsArray();
630 for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
631 CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
632 if (!pKid)
633 continue;
634
635 auto pNode = pdfium::MakeUnique<PageNode>();
636 pNode->m_dwPageNo = pKid->GetRefObjNum();
637 pPageNode->m_ChildNodes.push_back(std::move(pNode));
638 }
639 break;
640 }
641 default:
642 break;
643 }
644 return true;
645 }
646
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,int level)647 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
648 int32_t iPage,
649 int32_t& iCount,
650 int level) {
651 if (level >= kMaxPageRecursionDepth)
652 return false;
653
654 int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes);
655 if (iSize <= 0 || iPage >= iSize) {
656 m_docStatus = PDF_DATAAVAIL_ERROR;
657 return false;
658 }
659 for (int32_t i = 0; i < iSize; ++i) {
660 PageNode* pNode = pageNode.m_ChildNodes[i].get();
661 if (!pNode)
662 continue;
663
664 if (pNode->m_type == PDF_PAGENODE_UNKNOWN) {
665 // Updates the type for the unknown page node.
666 if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
667 return false;
668 }
669 if (pNode->m_type == PDF_PAGENODE_ARRAY) {
670 // Updates a more specific type for the array page node.
671 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
672 return false;
673 }
674 switch (pNode->m_type) {
675 case PDF_PAGENODE_PAGE:
676 iCount++;
677 if (iPage == iCount && m_pDocument)
678 m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
679 break;
680 case PDF_PAGENODE_PAGES:
681 if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
682 return false;
683 break;
684 case PDF_PAGENODE_UNKNOWN:
685 case PDF_PAGENODE_ARRAY:
686 // Already converted above, error if we get here.
687 return false;
688 }
689 if (iPage == iCount) {
690 m_docStatus = PDF_DATAAVAIL_DONE;
691 return true;
692 }
693 }
694 return true;
695 }
696
LoadDocPage(uint32_t dwPage)697 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
698 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
699 int32_t iPage = safePage.ValueOrDie();
700 if (m_pDocument->GetPageCount() <= iPage ||
701 m_pDocument->IsPageLoaded(iPage)) {
702 m_docStatus = PDF_DATAAVAIL_DONE;
703 return true;
704 }
705 if (m_PageNode.m_type == PDF_PAGENODE_PAGE) {
706 m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR;
707 return true;
708 }
709 int32_t iCount = -1;
710 return CheckPageNode(m_PageNode, iPage, iCount, 0);
711 }
712
CheckPageCount()713 bool CPDF_DataAvail::CheckPageCount() {
714 bool bExists = false;
715 std::unique_ptr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
716 if (!bExists) {
717 m_docStatus = PDF_DATAAVAIL_ERROR;
718 return false;
719 }
720 if (!pPages)
721 return false;
722
723 CPDF_Dictionary* pPagesDict = pPages->GetDict();
724 if (!pPagesDict) {
725 m_docStatus = PDF_DATAAVAIL_ERROR;
726 return false;
727 }
728 if (!pPagesDict->KeyExist("Kids"))
729 return true;
730
731 return pPagesDict->GetIntegerFor("Count") > 0;
732 }
733
LoadDocPages()734 bool CPDF_DataAvail::LoadDocPages() {
735 if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
736 return false;
737
738 if (CheckPageCount()) {
739 m_docStatus = PDF_DATAAVAIL_PAGE;
740 return true;
741 }
742
743 m_bTotalLoadPageTree = true;
744 return false;
745 }
746
LoadPages()747 bool CPDF_DataAvail::LoadPages() {
748 while (!m_bPagesTreeLoad) {
749 if (!CheckPageStatus())
750 return false;
751 }
752
753 if (m_bPagesLoad)
754 return true;
755
756 m_pDocument->LoadPages();
757 return false;
758 }
759
CheckLinearizedData()760 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
761 if (m_bLinearedDataOK)
762 return DataAvailable;
763 ASSERT(m_pLinearized);
764 if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
765 !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
766 return DataError;
767 }
768
769 if (!m_bMainXRefLoadTried) {
770 const FX_SAFE_FILESIZE main_xref_offset =
771 m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
772 if (!main_xref_offset.IsValid())
773 return DataError;
774
775 if (main_xref_offset.ValueOrDie() == 0)
776 return DataAvailable;
777
778 FX_SAFE_SIZE_T data_size = m_dwFileLen;
779 data_size -= main_xref_offset.ValueOrDie();
780 if (!data_size.IsValid())
781 return DataError;
782
783 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
784 main_xref_offset.ValueOrDie(), data_size.ValueOrDie()))
785 return DataNotAvailable;
786
787 CPDF_Parser::Error eRet =
788 m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
789 m_bMainXRefLoadTried = true;
790 if (eRet != CPDF_Parser::SUCCESS)
791 return DataError;
792
793 if (!PreparePageItem())
794 return DataNotAvailable;
795
796 m_bMainXRefLoadedOK = true;
797 m_bLinearedDataOK = true;
798 }
799
800 return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
801 }
802
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)803 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
804 uint32_t dwPage,
805 DownloadHints* pHints) {
806 if (!m_pDocument)
807 return DataError;
808
809 const FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
810 if (!safePage.IsValid())
811 return DataError;
812
813 if (safePage.ValueOrDie() >= m_pDocument->GetPageCount()) {
814 // This is XFA page.
815 return DataAvailable;
816 }
817
818 if (IsFirstCheck(dwPage)) {
819 m_bCurPageDictLoadOK = false;
820 }
821
822 if (pdfium::ContainsKey(m_pagesLoadState, dwPage))
823 return DataAvailable;
824
825 const HintsScope hints_scope(GetValidator().Get(), pHints);
826
827 if (m_pLinearized) {
828 if (dwPage == m_pLinearized->GetFirstPageNo()) {
829 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
830 if (!pPageDict)
831 return DataError;
832
833 auto page_num_obj = std::make_pair(
834 dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
835 GetValidator().Get(), m_pDocument, pPageDict));
836
837 CPDF_PageObjectAvail* page_obj_avail =
838 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
839 // TODO(art-snake): Check resources.
840 return page_obj_avail->CheckAvail();
841 }
842
843 DocAvailStatus nResult = CheckLinearizedData();
844 if (nResult != DataAvailable)
845 return nResult;
846
847 if (m_pHintTables) {
848 nResult = m_pHintTables->CheckPage(dwPage);
849 if (nResult != DataAvailable)
850 return nResult;
851 if (GetPage(dwPage)) {
852 m_pagesLoadState.insert(dwPage);
853 return DataAvailable;
854 }
855 }
856
857 if (!m_bMainXRefLoadedOK) {
858 if (!LoadAllFile())
859 return DataNotAvailable;
860 m_pDocument->GetParser()->RebuildCrossRef();
861 ResetFirstCheck(dwPage);
862 return DataAvailable;
863 }
864 if (m_bTotalLoadPageTree) {
865 if (!LoadPages())
866 return DataNotAvailable;
867 } else {
868 if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
869 return DataNotAvailable;
870 }
871 } else {
872 if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
873 return DataNotAvailable;
874 }
875 }
876
877 if (CheckAcroForm() == DocFormStatus::FormNotAvailable)
878 return DataNotAvailable;
879
880 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
881 if (!pPageDict)
882 return DataError;
883
884 {
885 auto page_num_obj = std::make_pair(
886 dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
887 GetValidator().Get(), m_pDocument, pPageDict));
888 CPDF_PageObjectAvail* page_obj_avail =
889 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
890 const DocAvailStatus status = page_obj_avail->CheckAvail();
891 if (status != DocAvailStatus::DataAvailable)
892 return status;
893 }
894
895 const DocAvailStatus resources_status = CheckResources(pPageDict);
896 if (resources_status != DocAvailStatus::DataAvailable)
897 return resources_status;
898
899 m_bCurPageDictLoadOK = false;
900 ResetFirstCheck(dwPage);
901 m_pagesLoadState.insert(dwPage);
902 return DataAvailable;
903 }
904
CheckResources(const CPDF_Dictionary * page)905 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
906 const CPDF_Dictionary* page) {
907 ASSERT(page);
908 const CPDF_ReadValidator::Session read_session(GetValidator().Get());
909 const CPDF_Object* resources = GetResourceObject(page);
910 if (GetValidator()->has_read_problems())
911 return DocAvailStatus::DataNotAvailable;
912
913 if (!resources)
914 return DocAvailStatus::DataAvailable;
915
916 CPDF_PageObjectAvail* resource_avail =
917 m_PagesResourcesAvail
918 .insert(std::make_pair(
919 resources, pdfium::MakeUnique<CPDF_PageObjectAvail>(
920 GetValidator().Get(), m_pDocument, resources)))
921 .first->second.get();
922 return resource_avail->CheckAvail();
923 }
924
GetFileRead() const925 RetainPtr<IFX_SeekableReadStream> CPDF_DataAvail::GetFileRead() const {
926 return m_pFileRead;
927 }
928
GetValidator() const929 RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
930 return m_pFileRead;
931 }
932
GetSyntaxParser() const933 CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
934 return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
935 : m_parser.m_pSyntax.get();
936 }
937
GetPageCount() const938 int CPDF_DataAvail::GetPageCount() const {
939 if (m_pLinearized)
940 return m_pLinearized->GetPageCount();
941 return m_pDocument ? m_pDocument->GetPageCount() : 0;
942 }
943
GetPage(int index)944 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
945 if (!m_pDocument || index < 0 || index >= GetPageCount())
946 return nullptr;
947 CPDF_Dictionary* page = m_pDocument->GetPage(index);
948 if (page)
949 return page;
950 if (!m_pLinearized || !m_pHintTables)
951 return nullptr;
952
953 if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
954 return nullptr;
955 FX_FILESIZE szPageStartPos = 0;
956 FX_FILESIZE szPageLength = 0;
957 uint32_t dwObjNum = 0;
958 const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
959 &szPageLength, &dwObjNum);
960 if (!bPagePosGot || !dwObjNum)
961 return nullptr;
962 // We should say to the document, which object is the page.
963 m_pDocument->SetPageObjNum(index, dwObjNum);
964 // Page object already can be parsed in document.
965 if (!m_pDocument->GetIndirectObject(dwObjNum)) {
966 m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
967 dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
968 }
969 if (!ValidatePage(index))
970 return nullptr;
971 return m_pDocument->GetPage(index);
972 }
973
IsFormAvail(DownloadHints * pHints)974 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
975 DownloadHints* pHints) {
976 const HintsScope hints_scope(GetValidator().Get(), pHints);
977 return CheckAcroForm();
978 }
979
CheckAcroForm()980 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
981 if (!m_pDocument)
982 return FormAvailable;
983
984 if (m_pLinearized) {
985 DocAvailStatus nDocStatus = CheckLinearizedData();
986 if (nDocStatus == DataError)
987 return FormError;
988 if (nDocStatus == DataNotAvailable)
989 return FormNotAvailable;
990 }
991
992 if (!m_pFormAvail) {
993 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
994 if (!pRoot)
995 return FormAvailable;
996
997 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
998 if (!pAcroForm)
999 return FormNotExist;
1000
1001 m_pFormAvail = pdfium::MakeUnique<CPDF_PageObjectAvail>(
1002 GetValidator().Get(), m_pDocument, pAcroForm);
1003 }
1004 switch (m_pFormAvail->CheckAvail()) {
1005 case DocAvailStatus::DataError:
1006 return DocFormStatus::FormError;
1007 case DocAvailStatus::DataNotAvailable:
1008 return DocFormStatus::FormNotAvailable;
1009 case DocAvailStatus::DataAvailable:
1010 return DocFormStatus::FormAvailable;
1011 default:
1012 NOTREACHED();
1013 }
1014 return DocFormStatus::FormError;
1015 }
1016
ValidatePage(uint32_t dwPage)1017 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) {
1018 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1019 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1020 if (!pPageDict)
1021 return false;
1022 CPDF_PageObjectAvail obj_avail(GetValidator().Get(), m_pDocument, pPageDict);
1023 return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable;
1024 }
1025
1026 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
ParseDocument(const char * password)1027 CPDF_DataAvail::ParseDocument(const char* password) {
1028 if (m_pDocument) {
1029 // We already returned parsed document.
1030 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1031 }
1032 auto parser = pdfium::MakeUnique<CPDF_Parser>();
1033 parser->SetPassword(password);
1034 auto document = pdfium::MakeUnique<CPDF_Document>(std::move(parser));
1035
1036 CPDF_ReadValidator::Session read_session(GetValidator().Get());
1037 CPDF_Parser::Error error = document->GetParser()->StartLinearizedParse(
1038 GetFileRead(), document.get());
1039
1040 // Additional check, that all ok.
1041 if (GetValidator()->has_read_problems()) {
1042 NOTREACHED();
1043 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1044 }
1045
1046 if (error != CPDF_Parser::SUCCESS)
1047 return std::make_pair(error, nullptr);
1048
1049 m_pDocument = document.get();
1050 return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
1051 }
1052
PageNode()1053 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
1054
~PageNode()1055 CPDF_DataAvail::PageNode::~PageNode() {}
1056