1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/parser/cpdf_data_avail.h" 8 9 #include <algorithm> 10 #include <memory> 11 #include <utility> 12 13 #include "core/fpdfapi/cpdf_modulemgr.h" 14 #include "core/fpdfapi/parser/cpdf_array.h" 15 #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h" 16 #include "core/fpdfapi/parser/cpdf_dictionary.h" 17 #include "core/fpdfapi/parser/cpdf_document.h" 18 #include "core/fpdfapi/parser/cpdf_hint_tables.h" 19 #include "core/fpdfapi/parser/cpdf_linearized_header.h" 20 #include "core/fpdfapi/parser/cpdf_name.h" 21 #include "core/fpdfapi/parser/cpdf_number.h" 22 #include "core/fpdfapi/parser/cpdf_page_object_avail.h" 23 #include "core/fpdfapi/parser/cpdf_read_validator.h" 24 #include "core/fpdfapi/parser/cpdf_reference.h" 25 #include "core/fpdfapi/parser/cpdf_stream.h" 26 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 27 #include "core/fxcrt/cfx_memorystream.h" 28 #include "core/fxcrt/fx_extension.h" 29 #include "core/fxcrt/fx_safe_types.h" 30 #include "third_party/base/numerics/safe_conversions.h" 31 #include "third_party/base/ptr_util.h" 32 #include "third_party/base/stl_util.h" 33 34 namespace { 35 36 // static GetResourceObject(const CPDF_Dictionary * pDict)37 const CPDF_Object* GetResourceObject(const CPDF_Dictionary* pDict) { 38 constexpr size_t kMaxHierarchyDepth = 64; 39 size_t depth = 0; 40 41 const CPDF_Dictionary* dictionary_to_check = pDict; 42 while (dictionary_to_check) { 43 const CPDF_Object* result = dictionary_to_check->GetObjectFor("Resources"); 44 if (result) 45 return result; 46 const CPDF_Object* parent = dictionary_to_check->GetObjectFor("Parent"); 47 dictionary_to_check = parent ? parent->GetDict() : nullptr; 48 49 if (++depth > kMaxHierarchyDepth) { 50 // We have cycle in parents hierarchy. 51 return nullptr; 52 } 53 } 54 return nullptr; 55 } 56 57 class HintsScope { 58 public: HintsScope(CPDF_ReadValidator * validator,CPDF_DataAvail::DownloadHints * hints)59 HintsScope(CPDF_ReadValidator* validator, 60 CPDF_DataAvail::DownloadHints* hints) 61 : validator_(validator) { 62 ASSERT(validator_); 63 validator_->SetDownloadHints(hints); 64 } 65 ~HintsScope()66 ~HintsScope() { validator_->SetDownloadHints(nullptr); } 67 68 private: 69 UnownedPtr<CPDF_ReadValidator> validator_; 70 }; 71 72 } // namespace 73 ~FileAvail()74 CPDF_DataAvail::FileAvail::~FileAvail() {} 75 ~DownloadHints()76 CPDF_DataAvail::DownloadHints::~DownloadHints() {} 77 CPDF_DataAvail(FileAvail * pFileAvail,const RetainPtr<IFX_SeekableReadStream> & pFileRead,bool bSupportHintTable)78 CPDF_DataAvail::CPDF_DataAvail( 79 FileAvail* pFileAvail, 80 const RetainPtr<IFX_SeekableReadStream>& pFileRead, 81 bool bSupportHintTable) 82 : m_pFileAvail(pFileAvail), 83 m_pFileRead( 84 pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, m_pFileAvail)), 85 m_dwFileLen(m_pFileRead->GetSize()), 86 m_bSupportHintTable(bSupportHintTable) {} 87 ~CPDF_DataAvail()88 CPDF_DataAvail::~CPDF_DataAvail() { 89 m_pHintTables.reset(); 90 } 91 IsDocAvail(DownloadHints * pHints)92 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail( 93 DownloadHints* pHints) { 94 if (!m_dwFileLen) 95 return DataError; 96 97 const HintsScope hints_scope(m_pFileRead.Get(), pHints); 98 99 while (!m_bDocAvail) { 100 if (!CheckDocStatus()) 101 return DataNotAvailable; 102 } 103 104 return DataAvailable; 105 } 106 CheckDocStatus()107 bool CPDF_DataAvail::CheckDocStatus() { 108 switch (m_docStatus) { 109 case PDF_DATAAVAIL_HEADER: 110 return CheckHeader(); 111 case PDF_DATAAVAIL_FIRSTPAGE: 112 return CheckFirstPage(); 113 case PDF_DATAAVAIL_HINTTABLE: 114 return CheckHintTables(); 115 case PDF_DATAAVAIL_LOADALLCROSSREF: 116 return CheckAndLoadAllXref(); 117 case PDF_DATAAVAIL_LOADALLFILE: 118 return LoadAllFile(); 119 case PDF_DATAAVAIL_ROOT: 120 return CheckRoot(); 121 case PDF_DATAAVAIL_INFO: 122 return CheckInfo(); 123 case PDF_DATAAVAIL_PAGETREE: 124 if (m_bTotalLoadPageTree) 125 return CheckPages(); 126 return LoadDocPages(); 127 case PDF_DATAAVAIL_PAGE: 128 if (m_bTotalLoadPageTree) 129 return CheckPage(); 130 m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD; 131 return true; 132 case PDF_DATAAVAIL_ERROR: 133 return LoadAllFile(); 134 case PDF_DATAAVAIL_PAGE_LATERLOAD: 135 m_docStatus = PDF_DATAAVAIL_PAGE; 136 default: 137 m_bDocAvail = true; 138 return true; 139 } 140 } 141 CheckPageStatus()142 bool CPDF_DataAvail::CheckPageStatus() { 143 switch (m_docStatus) { 144 case PDF_DATAAVAIL_PAGETREE: 145 return CheckPages(); 146 case PDF_DATAAVAIL_PAGE: 147 return CheckPage(); 148 case PDF_DATAAVAIL_ERROR: 149 return LoadAllFile(); 150 default: 151 m_bPagesTreeLoad = true; 152 m_bPagesLoad = true; 153 return true; 154 } 155 } 156 LoadAllFile()157 bool CPDF_DataAvail::LoadAllFile() { 158 if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) { 159 m_docStatus = PDF_DATAAVAIL_DONE; 160 return true; 161 } 162 return false; 163 } 164 CheckAndLoadAllXref()165 bool CPDF_DataAvail::CheckAndLoadAllXref() { 166 if (!m_pCrossRefAvail) { 167 const CPDF_ReadValidator::Session read_session(GetValidator().Get()); 168 const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef(); 169 if (GetValidator()->has_read_problems()) 170 return false; 171 172 if (last_xref_offset <= 0) { 173 m_docStatus = PDF_DATAAVAIL_ERROR; 174 return false; 175 } 176 177 m_pCrossRefAvail = pdfium::MakeUnique<CPDF_CrossRefAvail>(GetSyntaxParser(), 178 last_xref_offset); 179 } 180 181 switch (m_pCrossRefAvail->CheckAvail()) { 182 case DocAvailStatus::DataAvailable: 183 break; 184 case DocAvailStatus::DataNotAvailable: 185 return false; 186 case DocAvailStatus::DataError: 187 m_docStatus = PDF_DATAAVAIL_ERROR; 188 return false; 189 default: 190 NOTREACHED(); 191 return false; 192 } 193 194 if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) && 195 !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) { 196 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 197 return false; 198 } 199 200 m_dwRootObjNum = m_parser.GetRootObjNum(); 201 m_dwInfoObjNum = m_parser.GetInfoObjNum(); 202 m_pCurrentParser = &m_parser; 203 m_docStatus = PDF_DATAAVAIL_ROOT; 204 return true; 205 } 206 GetObject(uint32_t objnum,bool * pExistInFile)207 std::unique_ptr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum, 208 bool* pExistInFile) { 209 CPDF_Parser* pParser = nullptr; 210 211 if (pExistInFile) 212 *pExistInFile = true; 213 214 pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser; 215 216 std::unique_ptr<CPDF_Object> pRet; 217 if (pParser) { 218 const CPDF_ReadValidator::Session read_session(GetValidator().Get()); 219 pRet = pParser->ParseIndirectObject(nullptr, objnum); 220 if (GetValidator()->has_read_problems()) 221 return nullptr; 222 } 223 224 if (!pRet && pExistInFile) 225 *pExistInFile = false; 226 227 return pRet; 228 } 229 CheckInfo()230 bool CPDF_DataAvail::CheckInfo() { 231 bool bExist = false; 232 std::unique_ptr<CPDF_Object> pInfo = GetObject(m_dwInfoObjNum, &bExist); 233 if (bExist && !pInfo) { 234 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 235 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 236 return true; 237 } 238 return false; 239 } 240 m_docStatus = PDF_DATAAVAIL_PAGETREE; 241 return true; 242 } 243 CheckRoot()244 bool CPDF_DataAvail::CheckRoot() { 245 bool bExist = false; 246 m_pRoot = GetObject(m_dwRootObjNum, &bExist); 247 if (!bExist) { 248 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 249 return true; 250 } 251 252 if (!m_pRoot) { 253 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 254 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 255 return true; 256 } 257 return false; 258 } 259 260 CPDF_Dictionary* pDict = m_pRoot->GetDict(); 261 if (!pDict) { 262 m_docStatus = PDF_DATAAVAIL_ERROR; 263 return false; 264 } 265 266 CPDF_Reference* pRef = ToReference(pDict->GetObjectFor("Pages")); 267 if (!pRef) { 268 m_docStatus = PDF_DATAAVAIL_ERROR; 269 return false; 270 } 271 272 m_PagesObjNum = pRef->GetRefObjNum(); 273 274 m_docStatus = m_dwInfoObjNum ? PDF_DATAAVAIL_INFO : PDF_DATAAVAIL_PAGETREE; 275 return true; 276 } 277 PreparePageItem()278 bool CPDF_DataAvail::PreparePageItem() { 279 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); 280 CPDF_Reference* pRef = 281 ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr); 282 if (!pRef) { 283 m_docStatus = PDF_DATAAVAIL_ERROR; 284 return false; 285 } 286 287 m_PagesObjNum = pRef->GetRefObjNum(); 288 m_pCurrentParser = m_pDocument->GetParser(); 289 m_docStatus = PDF_DATAAVAIL_PAGETREE; 290 return true; 291 } 292 IsFirstCheck(uint32_t dwPage)293 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) { 294 return m_pageMapCheckState.insert(dwPage).second; 295 } 296 ResetFirstCheck(uint32_t dwPage)297 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) { 298 m_pageMapCheckState.erase(dwPage); 299 } 300 CheckPage()301 bool CPDF_DataAvail::CheckPage() { 302 std::vector<uint32_t> UnavailObjList; 303 for (uint32_t dwPageObjNum : m_PageObjList) { 304 bool bExists = false; 305 std::unique_ptr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists); 306 if (!pObj) { 307 if (bExists) 308 UnavailObjList.push_back(dwPageObjNum); 309 continue; 310 } 311 CPDF_Array* pArray = ToArray(pObj.get()); 312 if (pArray) { 313 for (const auto& pArrayObj : *pArray) { 314 if (CPDF_Reference* pRef = ToReference(pArrayObj.get())) 315 UnavailObjList.push_back(pRef->GetRefObjNum()); 316 } 317 } 318 if (!pObj->IsDictionary()) 319 continue; 320 321 ByteString type = pObj->GetDict()->GetStringFor("Type"); 322 if (type == "Pages") { 323 m_PagesArray.push_back(std::move(pObj)); 324 continue; 325 } 326 } 327 m_PageObjList.clear(); 328 if (!UnavailObjList.empty()) { 329 m_PageObjList = std::move(UnavailObjList); 330 return false; 331 } 332 size_t iPages = m_PagesArray.size(); 333 for (size_t i = 0; i < iPages; ++i) { 334 std::unique_ptr<CPDF_Object> pPages = std::move(m_PagesArray[i]); 335 if (pPages && !GetPageKids(m_pCurrentParser, pPages.get())) { 336 m_PagesArray.clear(); 337 m_docStatus = PDF_DATAAVAIL_ERROR; 338 return false; 339 } 340 } 341 m_PagesArray.clear(); 342 if (m_PageObjList.empty()) 343 m_docStatus = PDF_DATAAVAIL_DONE; 344 345 return true; 346 } 347 GetPageKids(CPDF_Parser * pParser,CPDF_Object * pPages)348 bool CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) { 349 if (!pParser) { 350 m_docStatus = PDF_DATAAVAIL_ERROR; 351 return false; 352 } 353 354 CPDF_Dictionary* pDict = pPages->GetDict(); 355 CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr; 356 if (!pKids) 357 return true; 358 359 switch (pKids->GetType()) { 360 case CPDF_Object::REFERENCE: 361 m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum()); 362 break; 363 case CPDF_Object::ARRAY: { 364 CPDF_Array* pKidsArray = pKids->AsArray(); 365 for (size_t i = 0; i < pKidsArray->GetCount(); ++i) { 366 if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i))) 367 m_PageObjList.push_back(pRef->GetRefObjNum()); 368 } 369 break; 370 } 371 default: 372 m_docStatus = PDF_DATAAVAIL_ERROR; 373 return false; 374 } 375 return true; 376 } 377 CheckPages()378 bool CPDF_DataAvail::CheckPages() { 379 bool bExists = false; 380 std::unique_ptr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists); 381 if (!bExists) { 382 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 383 return true; 384 } 385 386 if (!pPages) { 387 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 388 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 389 return true; 390 } 391 return false; 392 } 393 394 if (!GetPageKids(m_pCurrentParser, pPages.get())) { 395 m_docStatus = PDF_DATAAVAIL_ERROR; 396 return false; 397 } 398 399 m_docStatus = PDF_DATAAVAIL_PAGE; 400 return true; 401 } 402 CheckHeader()403 bool CPDF_DataAvail::CheckHeader() { 404 switch (CheckHeaderAndLinearized()) { 405 case DocAvailStatus::DataAvailable: 406 m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE 407 : PDF_DATAAVAIL_LOADALLCROSSREF; 408 return true; 409 case DocAvailStatus::DataNotAvailable: 410 return false; 411 case DocAvailStatus::DataError: 412 m_docStatus = PDF_DATAAVAIL_ERROR; 413 return true; 414 default: 415 NOTREACHED(); 416 return false; 417 } 418 } 419 CheckFirstPage()420 bool CPDF_DataAvail::CheckFirstPage() { 421 if (!m_pLinearized->GetFirstPageEndOffset() || 422 !m_pLinearized->GetFileSize() || 423 !m_pLinearized->GetMainXRefTableFirstEntryOffset()) { 424 m_docStatus = PDF_DATAAVAIL_ERROR; 425 return false; 426 } 427 428 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset(); 429 dwEnd += 512; 430 if ((FX_FILESIZE)dwEnd > m_dwFileLen) 431 dwEnd = (uint32_t)m_dwFileLen; 432 433 const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen; 434 const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0; 435 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos, 436 data_size)) 437 return false; 438 439 m_docStatus = 440 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE; 441 return true; 442 } 443 CheckHintTables()444 bool CPDF_DataAvail::CheckHintTables() { 445 if (m_pLinearized->GetPageCount() <= 1) { 446 m_docStatus = PDF_DATAAVAIL_DONE; 447 return true; 448 } 449 if (!m_pLinearized->HasHintTable()) { 450 m_docStatus = PDF_DATAAVAIL_ERROR; 451 return false; 452 } 453 454 const FX_FILESIZE szHintStart = m_pLinearized->GetHintStart(); 455 const uint32_t szHintLength = m_pLinearized->GetHintLength(); 456 457 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(szHintStart, 458 szHintLength)) 459 return false; 460 461 auto pHintTables = pdfium::MakeUnique<CPDF_HintTables>(GetValidator().Get(), 462 m_pLinearized.get()); 463 std::unique_ptr<CPDF_Object> pHintStream = 464 ParseIndirectObjectAt(szHintStart, 0); 465 CPDF_Stream* pStream = ToStream(pHintStream.get()); 466 if (pStream && pHintTables->LoadHintStream(pStream)) 467 m_pHintTables = std::move(pHintTables); 468 469 m_docStatus = PDF_DATAAVAIL_DONE; 470 return true; 471 } 472 ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList)473 std::unique_ptr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt( 474 FX_FILESIZE pos, 475 uint32_t objnum, 476 CPDF_IndirectObjectHolder* pObjList) { 477 const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos(); 478 GetSyntaxParser()->SetPos(pos); 479 std::unique_ptr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject( 480 pObjList, CPDF_SyntaxParser::ParseType::kLoose); 481 GetSyntaxParser()->SetPos(SavedPos); 482 return (result && (!objnum || result->GetObjNum() == objnum)) 483 ? std::move(result) 484 : nullptr; 485 } 486 IsLinearizedPDF()487 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() { 488 switch (CheckHeaderAndLinearized()) { 489 case DocAvailStatus::DataAvailable: 490 return m_pLinearized ? DocLinearizationStatus::Linearized 491 : DocLinearizationStatus::NotLinearized; 492 case DocAvailStatus::DataNotAvailable: 493 return DocLinearizationStatus::LinearizationUnknown; 494 case DocAvailStatus::DataError: 495 return DocLinearizationStatus::NotLinearized; 496 default: 497 NOTREACHED(); 498 return DocLinearizationStatus::LinearizationUnknown; 499 } 500 } 501 CheckHeaderAndLinearized()502 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() { 503 if (m_bHeaderAvail) 504 return DocAvailStatus::DataAvailable; 505 506 const CPDF_ReadValidator::Session read_session(GetValidator().Get()); 507 const int32_t header_offset = GetHeaderOffset(GetValidator()); 508 if (GetValidator()->has_read_problems()) 509 return DocAvailStatus::DataNotAvailable; 510 511 if (header_offset == kInvalidHeaderOffset) 512 return DocAvailStatus::DataError; 513 514 m_parser.m_pSyntax->InitParserWithValidator(GetValidator(), header_offset); 515 m_pLinearized = m_parser.ParseLinearizedHeader(); 516 if (GetValidator()->has_read_problems()) 517 return DocAvailStatus::DataNotAvailable; 518 519 m_bHeaderAvail = true; 520 return DocAvailStatus::DataAvailable; 521 } 522 CheckPage(uint32_t dwPage)523 bool CPDF_DataAvail::CheckPage(uint32_t dwPage) { 524 while (true) { 525 switch (m_docStatus) { 526 case PDF_DATAAVAIL_PAGETREE: 527 if (!LoadDocPages()) 528 return false; 529 break; 530 case PDF_DATAAVAIL_PAGE: 531 if (!LoadDocPage(dwPage)) 532 return false; 533 break; 534 case PDF_DATAAVAIL_ERROR: 535 return LoadAllFile(); 536 default: 537 m_bPagesTreeLoad = true; 538 m_bPagesLoad = true; 539 m_bCurPageDictLoadOK = true; 540 m_docStatus = PDF_DATAAVAIL_PAGE; 541 return true; 542 } 543 } 544 } 545 CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode)546 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo, 547 PageNode* pPageNode) { 548 bool bExists = false; 549 std::unique_ptr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists); 550 if (!bExists) { 551 m_docStatus = PDF_DATAAVAIL_ERROR; 552 return false; 553 } 554 555 if (!pPages) 556 return false; 557 558 CPDF_Array* pArray = pPages->AsArray(); 559 if (!pArray) { 560 m_docStatus = PDF_DATAAVAIL_ERROR; 561 return false; 562 } 563 564 pPageNode->m_type = PDF_PAGENODE_PAGES; 565 for (size_t i = 0; i < pArray->GetCount(); ++i) { 566 CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i)); 567 if (!pKid) 568 continue; 569 570 auto pNode = pdfium::MakeUnique<PageNode>(); 571 pNode->m_dwPageNo = pKid->GetRefObjNum(); 572 pPageNode->m_ChildNodes.push_back(std::move(pNode)); 573 } 574 return true; 575 } 576 CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode)577 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo, 578 PageNode* pPageNode) { 579 bool bExists = false; 580 std::unique_ptr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists); 581 if (!bExists) { 582 m_docStatus = PDF_DATAAVAIL_ERROR; 583 return false; 584 } 585 586 if (!pPage) 587 return false; 588 589 if (pPage->IsArray()) { 590 pPageNode->m_dwPageNo = dwPageNo; 591 pPageNode->m_type = PDF_PAGENODE_ARRAY; 592 return true; 593 } 594 595 if (!pPage->IsDictionary()) { 596 m_docStatus = PDF_DATAAVAIL_ERROR; 597 return false; 598 } 599 600 pPageNode->m_dwPageNo = dwPageNo; 601 CPDF_Dictionary* pDict = pPage->GetDict(); 602 const ByteString type = pDict->GetStringFor("Type"); 603 if (type == "Page") { 604 pPageNode->m_type = PDF_PAGENODE_PAGE; 605 return true; 606 } 607 608 if (type != "Pages") { 609 m_docStatus = PDF_DATAAVAIL_ERROR; 610 return false; 611 } 612 613 pPageNode->m_type = PDF_PAGENODE_PAGES; 614 CPDF_Object* pKids = pDict->GetObjectFor("Kids"); 615 if (!pKids) { 616 m_docStatus = PDF_DATAAVAIL_PAGE; 617 return true; 618 } 619 620 switch (pKids->GetType()) { 621 case CPDF_Object::REFERENCE: { 622 CPDF_Reference* pKid = pKids->AsReference(); 623 auto pNode = pdfium::MakeUnique<PageNode>(); 624 pNode->m_dwPageNo = pKid->GetRefObjNum(); 625 pPageNode->m_ChildNodes.push_back(std::move(pNode)); 626 break; 627 } 628 case CPDF_Object::ARRAY: { 629 CPDF_Array* pKidsArray = pKids->AsArray(); 630 for (size_t i = 0; i < pKidsArray->GetCount(); ++i) { 631 CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i)); 632 if (!pKid) 633 continue; 634 635 auto pNode = pdfium::MakeUnique<PageNode>(); 636 pNode->m_dwPageNo = pKid->GetRefObjNum(); 637 pPageNode->m_ChildNodes.push_back(std::move(pNode)); 638 } 639 break; 640 } 641 default: 642 break; 643 } 644 return true; 645 } 646 CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,int level)647 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode, 648 int32_t iPage, 649 int32_t& iCount, 650 int level) { 651 if (level >= kMaxPageRecursionDepth) 652 return false; 653 654 int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes); 655 if (iSize <= 0 || iPage >= iSize) { 656 m_docStatus = PDF_DATAAVAIL_ERROR; 657 return false; 658 } 659 for (int32_t i = 0; i < iSize; ++i) { 660 PageNode* pNode = pageNode.m_ChildNodes[i].get(); 661 if (!pNode) 662 continue; 663 664 if (pNode->m_type == PDF_PAGENODE_UNKNOWN) { 665 // Updates the type for the unknown page node. 666 if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode)) 667 return false; 668 } 669 if (pNode->m_type == PDF_PAGENODE_ARRAY) { 670 // Updates a more specific type for the array page node. 671 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode)) 672 return false; 673 } 674 switch (pNode->m_type) { 675 case PDF_PAGENODE_PAGE: 676 iCount++; 677 if (iPage == iCount && m_pDocument) 678 m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo); 679 break; 680 case PDF_PAGENODE_PAGES: 681 if (!CheckPageNode(*pNode, iPage, iCount, level + 1)) 682 return false; 683 break; 684 case PDF_PAGENODE_UNKNOWN: 685 case PDF_PAGENODE_ARRAY: 686 // Already converted above, error if we get here. 687 return false; 688 } 689 if (iPage == iCount) { 690 m_docStatus = PDF_DATAAVAIL_DONE; 691 return true; 692 } 693 } 694 return true; 695 } 696 LoadDocPage(uint32_t dwPage)697 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) { 698 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); 699 int32_t iPage = safePage.ValueOrDie(); 700 if (m_pDocument->GetPageCount() <= iPage || 701 m_pDocument->IsPageLoaded(iPage)) { 702 m_docStatus = PDF_DATAAVAIL_DONE; 703 return true; 704 } 705 if (m_PageNode.m_type == PDF_PAGENODE_PAGE) { 706 m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR; 707 return true; 708 } 709 int32_t iCount = -1; 710 return CheckPageNode(m_PageNode, iPage, iCount, 0); 711 } 712 CheckPageCount()713 bool CPDF_DataAvail::CheckPageCount() { 714 bool bExists = false; 715 std::unique_ptr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists); 716 if (!bExists) { 717 m_docStatus = PDF_DATAAVAIL_ERROR; 718 return false; 719 } 720 if (!pPages) 721 return false; 722 723 CPDF_Dictionary* pPagesDict = pPages->GetDict(); 724 if (!pPagesDict) { 725 m_docStatus = PDF_DATAAVAIL_ERROR; 726 return false; 727 } 728 if (!pPagesDict->KeyExist("Kids")) 729 return true; 730 731 return pPagesDict->GetIntegerFor("Count") > 0; 732 } 733 LoadDocPages()734 bool CPDF_DataAvail::LoadDocPages() { 735 if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode)) 736 return false; 737 738 if (CheckPageCount()) { 739 m_docStatus = PDF_DATAAVAIL_PAGE; 740 return true; 741 } 742 743 m_bTotalLoadPageTree = true; 744 return false; 745 } 746 LoadPages()747 bool CPDF_DataAvail::LoadPages() { 748 while (!m_bPagesTreeLoad) { 749 if (!CheckPageStatus()) 750 return false; 751 } 752 753 if (m_bPagesLoad) 754 return true; 755 756 m_pDocument->LoadPages(); 757 return false; 758 } 759 CheckLinearizedData()760 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() { 761 if (m_bLinearedDataOK) 762 return DataAvailable; 763 ASSERT(m_pLinearized); 764 if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument || 765 !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) { 766 return DataError; 767 } 768 769 if (!m_bMainXRefLoadTried) { 770 const FX_SAFE_FILESIZE main_xref_offset = 771 m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev"); 772 if (!main_xref_offset.IsValid()) 773 return DataError; 774 775 if (main_xref_offset.ValueOrDie() == 0) 776 return DataAvailable; 777 778 FX_SAFE_SIZE_T data_size = m_dwFileLen; 779 data_size -= main_xref_offset.ValueOrDie(); 780 if (!data_size.IsValid()) 781 return DataError; 782 783 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable( 784 main_xref_offset.ValueOrDie(), data_size.ValueOrDie())) 785 return DataNotAvailable; 786 787 CPDF_Parser::Error eRet = 788 m_pDocument->GetParser()->LoadLinearizedMainXRefTable(); 789 m_bMainXRefLoadTried = true; 790 if (eRet != CPDF_Parser::SUCCESS) 791 return DataError; 792 793 if (!PreparePageItem()) 794 return DataNotAvailable; 795 796 m_bMainXRefLoadedOK = true; 797 m_bLinearedDataOK = true; 798 } 799 800 return m_bLinearedDataOK ? DataAvailable : DataNotAvailable; 801 } 802 IsPageAvail(uint32_t dwPage,DownloadHints * pHints)803 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( 804 uint32_t dwPage, 805 DownloadHints* pHints) { 806 if (!m_pDocument) 807 return DataError; 808 809 const FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); 810 if (!safePage.IsValid()) 811 return DataError; 812 813 if (safePage.ValueOrDie() >= m_pDocument->GetPageCount()) { 814 // This is XFA page. 815 return DataAvailable; 816 } 817 818 if (IsFirstCheck(dwPage)) { 819 m_bCurPageDictLoadOK = false; 820 } 821 822 if (pdfium::ContainsKey(m_pagesLoadState, dwPage)) 823 return DataAvailable; 824 825 const HintsScope hints_scope(GetValidator().Get(), pHints); 826 827 if (m_pLinearized) { 828 if (dwPage == m_pLinearized->GetFirstPageNo()) { 829 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); 830 if (!pPageDict) 831 return DataError; 832 833 auto page_num_obj = std::make_pair( 834 dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>( 835 GetValidator().Get(), m_pDocument, pPageDict)); 836 837 CPDF_PageObjectAvail* page_obj_avail = 838 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get(); 839 // TODO(art-snake): Check resources. 840 return page_obj_avail->CheckAvail(); 841 } 842 843 DocAvailStatus nResult = CheckLinearizedData(); 844 if (nResult != DataAvailable) 845 return nResult; 846 847 if (m_pHintTables) { 848 nResult = m_pHintTables->CheckPage(dwPage); 849 if (nResult != DataAvailable) 850 return nResult; 851 if (GetPage(dwPage)) { 852 m_pagesLoadState.insert(dwPage); 853 return DataAvailable; 854 } 855 } 856 857 if (!m_bMainXRefLoadedOK) { 858 if (!LoadAllFile()) 859 return DataNotAvailable; 860 m_pDocument->GetParser()->RebuildCrossRef(); 861 ResetFirstCheck(dwPage); 862 return DataAvailable; 863 } 864 if (m_bTotalLoadPageTree) { 865 if (!LoadPages()) 866 return DataNotAvailable; 867 } else { 868 if (!m_bCurPageDictLoadOK && !CheckPage(dwPage)) 869 return DataNotAvailable; 870 } 871 } else { 872 if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) { 873 return DataNotAvailable; 874 } 875 } 876 877 if (CheckAcroForm() == DocFormStatus::FormNotAvailable) 878 return DataNotAvailable; 879 880 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); 881 if (!pPageDict) 882 return DataError; 883 884 { 885 auto page_num_obj = std::make_pair( 886 dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>( 887 GetValidator().Get(), m_pDocument, pPageDict)); 888 CPDF_PageObjectAvail* page_obj_avail = 889 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get(); 890 const DocAvailStatus status = page_obj_avail->CheckAvail(); 891 if (status != DocAvailStatus::DataAvailable) 892 return status; 893 } 894 895 const DocAvailStatus resources_status = CheckResources(pPageDict); 896 if (resources_status != DocAvailStatus::DataAvailable) 897 return resources_status; 898 899 m_bCurPageDictLoadOK = false; 900 ResetFirstCheck(dwPage); 901 m_pagesLoadState.insert(dwPage); 902 return DataAvailable; 903 } 904 CheckResources(const CPDF_Dictionary * page)905 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources( 906 const CPDF_Dictionary* page) { 907 ASSERT(page); 908 const CPDF_ReadValidator::Session read_session(GetValidator().Get()); 909 const CPDF_Object* resources = GetResourceObject(page); 910 if (GetValidator()->has_read_problems()) 911 return DocAvailStatus::DataNotAvailable; 912 913 if (!resources) 914 return DocAvailStatus::DataAvailable; 915 916 CPDF_PageObjectAvail* resource_avail = 917 m_PagesResourcesAvail 918 .insert(std::make_pair( 919 resources, pdfium::MakeUnique<CPDF_PageObjectAvail>( 920 GetValidator().Get(), m_pDocument, resources))) 921 .first->second.get(); 922 return resource_avail->CheckAvail(); 923 } 924 GetFileRead() const925 RetainPtr<IFX_SeekableReadStream> CPDF_DataAvail::GetFileRead() const { 926 return m_pFileRead; 927 } 928 GetValidator() const929 RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const { 930 return m_pFileRead; 931 } 932 GetSyntaxParser() const933 CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const { 934 return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get() 935 : m_parser.m_pSyntax.get(); 936 } 937 GetPageCount() const938 int CPDF_DataAvail::GetPageCount() const { 939 if (m_pLinearized) 940 return m_pLinearized->GetPageCount(); 941 return m_pDocument ? m_pDocument->GetPageCount() : 0; 942 } 943 GetPage(int index)944 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { 945 if (!m_pDocument || index < 0 || index >= GetPageCount()) 946 return nullptr; 947 CPDF_Dictionary* page = m_pDocument->GetPage(index); 948 if (page) 949 return page; 950 if (!m_pLinearized || !m_pHintTables) 951 return nullptr; 952 953 if (index == static_cast<int>(m_pLinearized->GetFirstPageNo())) 954 return nullptr; 955 FX_FILESIZE szPageStartPos = 0; 956 FX_FILESIZE szPageLength = 0; 957 uint32_t dwObjNum = 0; 958 const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos, 959 &szPageLength, &dwObjNum); 960 if (!bPagePosGot || !dwObjNum) 961 return nullptr; 962 // We should say to the document, which object is the page. 963 m_pDocument->SetPageObjNum(index, dwObjNum); 964 // Page object already can be parsed in document. 965 if (!m_pDocument->GetIndirectObject(dwObjNum)) { 966 m_pDocument->ReplaceIndirectObjectIfHigherGeneration( 967 dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument)); 968 } 969 if (!ValidatePage(index)) 970 return nullptr; 971 return m_pDocument->GetPage(index); 972 } 973 IsFormAvail(DownloadHints * pHints)974 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( 975 DownloadHints* pHints) { 976 const HintsScope hints_scope(GetValidator().Get(), pHints); 977 return CheckAcroForm(); 978 } 979 CheckAcroForm()980 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() { 981 if (!m_pDocument) 982 return FormAvailable; 983 984 if (m_pLinearized) { 985 DocAvailStatus nDocStatus = CheckLinearizedData(); 986 if (nDocStatus == DataError) 987 return FormError; 988 if (nDocStatus == DataNotAvailable) 989 return FormNotAvailable; 990 } 991 992 if (!m_pFormAvail) { 993 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); 994 if (!pRoot) 995 return FormAvailable; 996 997 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm"); 998 if (!pAcroForm) 999 return FormNotExist; 1000 1001 m_pFormAvail = pdfium::MakeUnique<CPDF_PageObjectAvail>( 1002 GetValidator().Get(), m_pDocument, pAcroForm); 1003 } 1004 switch (m_pFormAvail->CheckAvail()) { 1005 case DocAvailStatus::DataError: 1006 return DocFormStatus::FormError; 1007 case DocAvailStatus::DataNotAvailable: 1008 return DocFormStatus::FormNotAvailable; 1009 case DocAvailStatus::DataAvailable: 1010 return DocFormStatus::FormAvailable; 1011 default: 1012 NOTREACHED(); 1013 } 1014 return DocFormStatus::FormError; 1015 } 1016 ValidatePage(uint32_t dwPage)1017 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) { 1018 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); 1019 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); 1020 if (!pPageDict) 1021 return false; 1022 CPDF_PageObjectAvail obj_avail(GetValidator().Get(), m_pDocument, pPageDict); 1023 return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable; 1024 } 1025 1026 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>> ParseDocument(const char * password)1027 CPDF_DataAvail::ParseDocument(const char* password) { 1028 if (m_pDocument) { 1029 // We already returned parsed document. 1030 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr); 1031 } 1032 auto parser = pdfium::MakeUnique<CPDF_Parser>(); 1033 parser->SetPassword(password); 1034 auto document = pdfium::MakeUnique<CPDF_Document>(std::move(parser)); 1035 1036 CPDF_ReadValidator::Session read_session(GetValidator().Get()); 1037 CPDF_Parser::Error error = document->GetParser()->StartLinearizedParse( 1038 GetFileRead(), document.get()); 1039 1040 // Additional check, that all ok. 1041 if (GetValidator()->has_read_problems()) { 1042 NOTREACHED(); 1043 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr); 1044 } 1045 1046 if (error != CPDF_Parser::SUCCESS) 1047 return std::make_pair(error, nullptr); 1048 1049 m_pDocument = document.get(); 1050 return std::make_pair(CPDF_Parser::SUCCESS, std::move(document)); 1051 } 1052 PageNode()1053 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} 1054 ~PageNode()1055 CPDF_DataAvail::PageNode::~PageNode() {} 1056