1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12
13 #include "core/fpdfapi/cpdf_modulemgr.h"
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
18 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_reference.h"
22 #include "core/fpdfapi/parser/cpdf_stream.h"
23 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
24 #include "core/fxcrt/fx_ext.h"
25 #include "core/fxcrt/fx_safe_types.h"
26 #include "third_party/base/numerics/safe_conversions.h"
27 #include "third_party/base/ptr_util.h"
28 #include "third_party/base/stl_util.h"
29
~FileAvail()30 CPDF_DataAvail::FileAvail::~FileAvail() {}
31
~DownloadHints()32 CPDF_DataAvail::DownloadHints::~DownloadHints() {}
33
34 // static
35 int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0;
36
CPDF_DataAvail(FileAvail * pFileAvail,const CFX_RetainPtr<IFX_SeekableReadStream> & pFileRead,bool bSupportHintTable)37 CPDF_DataAvail::CPDF_DataAvail(
38 FileAvail* pFileAvail,
39 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead,
40 bool bSupportHintTable)
41 : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {
42 m_Pos = 0;
43 m_dwFileLen = 0;
44 if (m_pFileRead) {
45 m_dwFileLen = (uint32_t)m_pFileRead->GetSize();
46 }
47 m_dwCurrentOffset = 0;
48 m_dwXRefOffset = 0;
49 m_dwTrailerOffset = 0;
50 m_bufferOffset = 0;
51 m_bufferSize = 0;
52 m_PagesObjNum = 0;
53 m_dwCurrentXRefSteam = 0;
54 m_dwAcroFormObjNum = 0;
55 m_dwInfoObjNum = 0;
56 m_pDocument = 0;
57 m_dwEncryptObjNum = 0;
58 m_dwPrevXRefOffset = 0;
59 m_dwLastXRefOffset = 0;
60 m_bDocAvail = false;
61 m_bMainXRefLoadTried = false;
62 m_bDocAvail = false;
63 m_bPagesLoad = false;
64 m_bPagesTreeLoad = false;
65 m_bMainXRefLoadedOK = false;
66 m_bAnnotsLoad = false;
67 m_bHaveAcroForm = false;
68 m_bAcroFormLoad = false;
69 m_bPageLoadedOK = false;
70 m_bNeedDownLoadResource = false;
71 m_bLinearizedFormParamLoad = false;
72 m_pTrailer = nullptr;
73 m_pCurrentParser = nullptr;
74 m_pAcroForm = nullptr;
75 m_pPageDict = nullptr;
76 m_pPageResource = nullptr;
77 m_docStatus = PDF_DATAAVAIL_HEADER;
78 m_bTotalLoadPageTree = false;
79 m_bCurPageDictLoadOK = false;
80 m_bLinearedDataOK = false;
81 m_bSupportHintTable = bSupportHintTable;
82 }
83
~CPDF_DataAvail()84 CPDF_DataAvail::~CPDF_DataAvail() {
85 m_pHintTables.reset();
86 for (CPDF_Object* pObject : m_arrayAcroforms)
87 delete pObject;
88 }
89
SetDocument(CPDF_Document * pDoc)90 void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc) {
91 m_pDocument = pDoc;
92 }
93
GetObjectSize(uint32_t objnum,FX_FILESIZE & offset)94 uint32_t CPDF_DataAvail::GetObjectSize(uint32_t objnum, FX_FILESIZE& offset) {
95 CPDF_Parser* pParser = m_pDocument->GetParser();
96 if (!pParser || !pParser->IsValidObjectNumber(objnum))
97 return 0;
98
99 if (pParser->GetObjectType(objnum) == 2)
100 objnum = pParser->GetObjectPositionOrZero(objnum);
101
102 if (pParser->GetObjectType(objnum) != 1 &&
103 pParser->GetObjectType(objnum) != 255) {
104 return 0;
105 }
106
107 offset = pParser->GetObjectPositionOrZero(objnum);
108 if (offset == 0)
109 return 0;
110
111 auto it = pParser->m_SortedOffset.find(offset);
112 if (it == pParser->m_SortedOffset.end() ||
113 ++it == pParser->m_SortedOffset.end()) {
114 return 0;
115 }
116 return *it - offset;
117 }
118
AreObjectsAvailable(std::vector<CPDF_Object * > & obj_array,bool bParsePage,DownloadHints * pHints,std::vector<CPDF_Object * > & ret_array)119 bool CPDF_DataAvail::AreObjectsAvailable(std::vector<CPDF_Object*>& obj_array,
120 bool bParsePage,
121 DownloadHints* pHints,
122 std::vector<CPDF_Object*>& ret_array) {
123 if (obj_array.empty())
124 return true;
125
126 uint32_t count = 0;
127 std::vector<CPDF_Object*> new_obj_array;
128 for (CPDF_Object* pObj : obj_array) {
129 if (!pObj)
130 continue;
131
132 int32_t type = pObj->GetType();
133 switch (type) {
134 case CPDF_Object::ARRAY: {
135 CPDF_Array* pArray = pObj->AsArray();
136 for (size_t k = 0; k < pArray->GetCount(); ++k)
137 new_obj_array.push_back(pArray->GetObjectAt(k));
138 } break;
139 case CPDF_Object::STREAM:
140 pObj = pObj->GetDict();
141 case CPDF_Object::DICTIONARY: {
142 CPDF_Dictionary* pDict = pObj->GetDict();
143 if (pDict && pDict->GetStringFor("Type") == "Page" && !bParsePage)
144 continue;
145
146 for (const auto& it : *pDict) {
147 if (it.first != "Parent")
148 new_obj_array.push_back(it.second.get());
149 }
150 } break;
151 case CPDF_Object::REFERENCE: {
152 CPDF_Reference* pRef = pObj->AsReference();
153 uint32_t dwNum = pRef->GetRefObjNum();
154
155 FX_FILESIZE offset;
156 uint32_t size = GetObjectSize(dwNum, offset);
157 if (size == 0 || offset < 0 || offset >= m_dwFileLen)
158 break;
159
160 if (!IsDataAvail(offset, size, pHints)) {
161 ret_array.push_back(pObj);
162 count++;
163 } else if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) {
164 m_ObjectSet.insert(dwNum);
165 CPDF_Object* pReferred =
166 m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum());
167 if (pReferred)
168 new_obj_array.push_back(pReferred);
169 }
170 } break;
171 }
172 }
173
174 if (count > 0) {
175 for (CPDF_Object* pObj : new_obj_array) {
176 CPDF_Reference* pRef = pObj->AsReference();
177 if (pRef && pdfium::ContainsKey(m_ObjectSet, pRef->GetRefObjNum()))
178 continue;
179 ret_array.push_back(pObj);
180 }
181 return false;
182 }
183
184 obj_array = new_obj_array;
185 return AreObjectsAvailable(obj_array, false, pHints, ret_array);
186 }
187
IsDocAvail(DownloadHints * pHints)188 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
189 DownloadHints* pHints) {
190 if (!m_dwFileLen && m_pFileRead) {
191 m_dwFileLen = (uint32_t)m_pFileRead->GetSize();
192 if (!m_dwFileLen)
193 return DataError;
194 }
195
196 while (!m_bDocAvail) {
197 if (!CheckDocStatus(pHints))
198 return DataNotAvailable;
199 }
200
201 return DataAvailable;
202 }
203
CheckAcroFormSubObject(DownloadHints * pHints)204 bool CPDF_DataAvail::CheckAcroFormSubObject(DownloadHints* pHints) {
205 if (m_objs_array.empty()) {
206 m_ObjectSet.clear();
207 std::vector<CPDF_Object*> obj_array = m_arrayAcroforms;
208 if (!AreObjectsAvailable(obj_array, false, pHints, m_objs_array))
209 return false;
210
211 m_objs_array.clear();
212 return true;
213 }
214
215 std::vector<CPDF_Object*> new_objs_array;
216 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
217 m_objs_array = new_objs_array;
218 return false;
219 }
220
221 for (CPDF_Object* pObject : m_arrayAcroforms)
222 delete pObject;
223
224 m_arrayAcroforms.clear();
225 return true;
226 }
227
CheckAcroForm(DownloadHints * pHints)228 bool CPDF_DataAvail::CheckAcroForm(DownloadHints* pHints) {
229 bool bExist = false;
230 m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist).release();
231 if (!bExist) {
232 m_docStatus = PDF_DATAAVAIL_PAGETREE;
233 return true;
234 }
235
236 if (!m_pAcroForm) {
237 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
238 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
239 return true;
240 }
241 return false;
242 }
243
244 m_arrayAcroforms.push_back(m_pAcroForm);
245 m_docStatus = PDF_DATAAVAIL_PAGETREE;
246 return true;
247 }
248
CheckDocStatus(DownloadHints * pHints)249 bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) {
250 switch (m_docStatus) {
251 case PDF_DATAAVAIL_HEADER:
252 return CheckHeader(pHints);
253 case PDF_DATAAVAIL_FIRSTPAGE:
254 return CheckFirstPage(pHints);
255 case PDF_DATAAVAIL_HINTTABLE:
256 return CheckHintTables(pHints);
257 case PDF_DATAAVAIL_END:
258 return CheckEnd(pHints);
259 case PDF_DATAAVAIL_CROSSREF:
260 return CheckCrossRef(pHints);
261 case PDF_DATAAVAIL_CROSSREF_ITEM:
262 return CheckCrossRefItem(pHints);
263 case PDF_DATAAVAIL_CROSSREF_STREAM:
264 return CheckAllCrossRefStream(pHints);
265 case PDF_DATAAVAIL_TRAILER:
266 return CheckTrailer(pHints);
267 case PDF_DATAAVAIL_TRAILER_APPEND:
268 return CheckTrailerAppend(pHints);
269 case PDF_DATAAVAIL_LOADALLCROSSREF:
270 return LoadAllXref(pHints);
271 case PDF_DATAAVAIL_LOADALLFILE:
272 return LoadAllFile(pHints);
273 case PDF_DATAAVAIL_ROOT:
274 return CheckRoot(pHints);
275 case PDF_DATAAVAIL_INFO:
276 return CheckInfo(pHints);
277 case PDF_DATAAVAIL_ACROFORM:
278 return CheckAcroForm(pHints);
279 case PDF_DATAAVAIL_PAGETREE:
280 if (m_bTotalLoadPageTree)
281 return CheckPages(pHints);
282 return LoadDocPages(pHints);
283 case PDF_DATAAVAIL_PAGE:
284 if (m_bTotalLoadPageTree)
285 return CheckPage(pHints);
286 m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
287 return true;
288 case PDF_DATAAVAIL_ERROR:
289 return LoadAllFile(pHints);
290 case PDF_DATAAVAIL_PAGE_LATERLOAD:
291 m_docStatus = PDF_DATAAVAIL_PAGE;
292 default:
293 m_bDocAvail = true;
294 return true;
295 }
296 }
297
CheckPageStatus(DownloadHints * pHints)298 bool CPDF_DataAvail::CheckPageStatus(DownloadHints* pHints) {
299 switch (m_docStatus) {
300 case PDF_DATAAVAIL_PAGETREE:
301 return CheckPages(pHints);
302 case PDF_DATAAVAIL_PAGE:
303 return CheckPage(pHints);
304 case PDF_DATAAVAIL_ERROR:
305 return LoadAllFile(pHints);
306 default:
307 m_bPagesTreeLoad = true;
308 m_bPagesLoad = true;
309 return true;
310 }
311 }
312
LoadAllFile(DownloadHints * pHints)313 bool CPDF_DataAvail::LoadAllFile(DownloadHints* pHints) {
314 if (m_pFileAvail->IsDataAvail(0, (uint32_t)m_dwFileLen)) {
315 m_docStatus = PDF_DATAAVAIL_DONE;
316 return true;
317 }
318
319 pHints->AddSegment(0, (uint32_t)m_dwFileLen);
320 return false;
321 }
322
LoadAllXref(DownloadHints * pHints)323 bool CPDF_DataAvail::LoadAllXref(DownloadHints* pHints) {
324 m_parser.m_pSyntax->InitParser(m_pFileRead, (uint32_t)m_dwHeaderOffset);
325 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&
326 !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
327 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
328 return false;
329 }
330
331 m_dwRootObjNum = m_parser.GetRootObjNum();
332 m_dwInfoObjNum = m_parser.GetInfoObjNum();
333 m_pCurrentParser = &m_parser;
334 m_docStatus = PDF_DATAAVAIL_ROOT;
335 return true;
336 }
337
GetObject(uint32_t objnum,DownloadHints * pHints,bool * pExistInFile)338 std::unique_ptr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
339 DownloadHints* pHints,
340 bool* pExistInFile) {
341 uint32_t size = 0;
342 FX_FILESIZE offset = 0;
343 CPDF_Parser* pParser = nullptr;
344
345 if (pExistInFile)
346 *pExistInFile = true;
347
348 if (m_pDocument) {
349 size = GetObjectSize(objnum, offset);
350 pParser = m_pDocument->GetParser();
351 } else {
352 size = (uint32_t)m_parser.GetObjectSize(objnum);
353 offset = m_parser.GetObjectOffset(objnum);
354 pParser = &m_parser;
355 }
356
357 if (!IsDataAvail(offset, size, pHints))
358 return nullptr;
359
360 std::unique_ptr<CPDF_Object> pRet;
361 if (pParser)
362 pRet = pParser->ParseIndirectObject(nullptr, objnum);
363
364 if (!pRet && pExistInFile)
365 *pExistInFile = false;
366
367 return pRet;
368 }
369
CheckInfo(DownloadHints * pHints)370 bool CPDF_DataAvail::CheckInfo(DownloadHints* pHints) {
371 bool bExist = false;
372 std::unique_ptr<CPDF_Object> pInfo =
373 GetObject(m_dwInfoObjNum, pHints, &bExist);
374 if (bExist && !pInfo) {
375 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
376 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
377 return true;
378 }
379 if (m_Pos == m_dwFileLen)
380 m_docStatus = PDF_DATAAVAIL_ERROR;
381 return false;
382 }
383 m_docStatus =
384 m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE;
385 return true;
386 }
387
CheckRoot(DownloadHints * pHints)388 bool CPDF_DataAvail::CheckRoot(DownloadHints* pHints) {
389 bool bExist = false;
390 m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist);
391 if (!bExist) {
392 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
393 return true;
394 }
395
396 if (!m_pRoot) {
397 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
398 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
399 return true;
400 }
401 return false;
402 }
403
404 CPDF_Dictionary* pDict = m_pRoot->GetDict();
405 if (!pDict) {
406 m_docStatus = PDF_DATAAVAIL_ERROR;
407 return false;
408 }
409
410 CPDF_Reference* pRef = ToReference(pDict->GetObjectFor("Pages"));
411 if (!pRef) {
412 m_docStatus = PDF_DATAAVAIL_ERROR;
413 return false;
414 }
415
416 m_PagesObjNum = pRef->GetRefObjNum();
417 CPDF_Reference* pAcroFormRef =
418 ToReference(m_pRoot->GetDict()->GetObjectFor("AcroForm"));
419 if (pAcroFormRef) {
420 m_bHaveAcroForm = true;
421 m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum();
422 }
423
424 if (m_dwInfoObjNum) {
425 m_docStatus = PDF_DATAAVAIL_INFO;
426 } else {
427 m_docStatus =
428 m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE;
429 }
430 return true;
431 }
432
PreparePageItem()433 bool CPDF_DataAvail::PreparePageItem() {
434 CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
435 CPDF_Reference* pRef =
436 ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
437 if (!pRef) {
438 m_docStatus = PDF_DATAAVAIL_ERROR;
439 return false;
440 }
441
442 m_PagesObjNum = pRef->GetRefObjNum();
443 m_pCurrentParser = m_pDocument->GetParser();
444 m_docStatus = PDF_DATAAVAIL_PAGETREE;
445 return true;
446 }
447
IsFirstCheck(uint32_t dwPage)448 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
449 return m_pageMapCheckState.insert(dwPage).second;
450 }
451
ResetFirstCheck(uint32_t dwPage)452 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
453 m_pageMapCheckState.erase(dwPage);
454 }
455
CheckPage(DownloadHints * pHints)456 bool CPDF_DataAvail::CheckPage(DownloadHints* pHints) {
457 std::vector<uint32_t> UnavailObjList;
458 for (uint32_t dwPageObjNum : m_PageObjList) {
459 bool bExists = false;
460 std::unique_ptr<CPDF_Object> pObj =
461 GetObject(dwPageObjNum, pHints, &bExists);
462 if (!pObj) {
463 if (bExists)
464 UnavailObjList.push_back(dwPageObjNum);
465 continue;
466 }
467 CPDF_Array* pArray = ToArray(pObj.get());
468 if (pArray) {
469 for (const auto& pArrayObj : *pArray) {
470 if (CPDF_Reference* pRef = ToReference(pArrayObj.get()))
471 UnavailObjList.push_back(pRef->GetRefObjNum());
472 }
473 }
474 if (!pObj->IsDictionary())
475 continue;
476
477 CFX_ByteString type = pObj->GetDict()->GetStringFor("Type");
478 if (type == "Pages") {
479 m_PagesArray.push_back(std::move(pObj));
480 continue;
481 }
482 }
483 m_PageObjList.clear();
484 if (!UnavailObjList.empty()) {
485 m_PageObjList = std::move(UnavailObjList);
486 return false;
487 }
488 size_t iPages = m_PagesArray.size();
489 for (size_t i = 0; i < iPages; ++i) {
490 std::unique_ptr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
491 if (pPages && !GetPageKids(m_pCurrentParser, pPages.get())) {
492 m_PagesArray.clear();
493 m_docStatus = PDF_DATAAVAIL_ERROR;
494 return false;
495 }
496 }
497 m_PagesArray.clear();
498 if (m_PageObjList.empty())
499 m_docStatus = PDF_DATAAVAIL_DONE;
500
501 return true;
502 }
503
GetPageKids(CPDF_Parser * pParser,CPDF_Object * pPages)504 bool CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) {
505 if (!pParser) {
506 m_docStatus = PDF_DATAAVAIL_ERROR;
507 return false;
508 }
509
510 CPDF_Dictionary* pDict = pPages->GetDict();
511 CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
512 if (!pKids)
513 return true;
514
515 switch (pKids->GetType()) {
516 case CPDF_Object::REFERENCE:
517 m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum());
518 break;
519 case CPDF_Object::ARRAY: {
520 CPDF_Array* pKidsArray = pKids->AsArray();
521 for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
522 if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i)))
523 m_PageObjList.push_back(pRef->GetRefObjNum());
524 }
525 } break;
526 default:
527 m_docStatus = PDF_DATAAVAIL_ERROR;
528 return false;
529 }
530 return true;
531 }
532
CheckPages(DownloadHints * pHints)533 bool CPDF_DataAvail::CheckPages(DownloadHints* pHints) {
534 bool bExists = false;
535 std::unique_ptr<CPDF_Object> pPages =
536 GetObject(m_PagesObjNum, pHints, &bExists);
537 if (!bExists) {
538 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
539 return true;
540 }
541
542 if (!pPages) {
543 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
544 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
545 return true;
546 }
547 return false;
548 }
549
550 if (!GetPageKids(m_pCurrentParser, pPages.get())) {
551 m_docStatus = PDF_DATAAVAIL_ERROR;
552 return false;
553 }
554
555 m_docStatus = PDF_DATAAVAIL_PAGE;
556 return true;
557 }
558
CheckHeader(DownloadHints * pHints)559 bool CPDF_DataAvail::CheckHeader(DownloadHints* pHints) {
560 ASSERT(m_dwFileLen >= 0);
561 const uint32_t kReqSize = std::min(static_cast<uint32_t>(m_dwFileLen), 1024U);
562
563 if (m_pFileAvail->IsDataAvail(0, kReqSize)) {
564 uint8_t buffer[1024];
565 m_pFileRead->ReadBlock(buffer, 0, kReqSize);
566
567 if (IsLinearizedFile(buffer, kReqSize)) {
568 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE;
569 } else {
570 if (m_docStatus == PDF_DATAAVAIL_ERROR)
571 return false;
572 m_docStatus = PDF_DATAAVAIL_END;
573 }
574 return true;
575 }
576
577 pHints->AddSegment(0, kReqSize);
578 return false;
579 }
580
CheckFirstPage(DownloadHints * pHints)581 bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) {
582 if (!m_pLinearized->GetFirstPageEndOffset() ||
583 !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) {
584 m_docStatus = PDF_DATAAVAIL_ERROR;
585 return false;
586 }
587
588 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
589 dwEnd += 512;
590 if ((FX_FILESIZE)dwEnd > m_dwFileLen)
591 dwEnd = (uint32_t)m_dwFileLen;
592
593 int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
594 int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
595 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
596 pHints->AddSegment(iStartPos, iSize);
597 return false;
598 }
599
600 m_docStatus =
601 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
602 return true;
603 }
604
IsDataAvail(FX_FILESIZE offset,uint32_t size,DownloadHints * pHints)605 bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset,
606 uint32_t size,
607 DownloadHints* pHints) {
608 if (offset < 0 || offset > m_dwFileLen)
609 return true;
610
611 FX_SAFE_FILESIZE safeSize = offset;
612 safeSize += size;
613 safeSize += 512;
614 if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen)
615 size = m_dwFileLen - offset;
616 else
617 size += 512;
618
619 if (!m_pFileAvail->IsDataAvail(offset, size)) {
620 if (pHints)
621 pHints->AddSegment(offset, size);
622 return false;
623 }
624 return true;
625 }
626
CheckHintTables(DownloadHints * pHints)627 bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) {
628 if (m_pLinearized->GetPageCount() <= 1) {
629 m_docStatus = PDF_DATAAVAIL_DONE;
630 return true;
631 }
632 if (!m_pLinearized->HasHintTable()) {
633 m_docStatus = PDF_DATAAVAIL_ERROR;
634 return false;
635 }
636
637 FX_FILESIZE szHintStart = m_pLinearized->GetHintStart();
638 FX_FILESIZE szHintLength = m_pLinearized->GetHintLength();
639
640 if (!IsDataAvail(szHintStart, szHintLength, pHints))
641 return false;
642
643 m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset);
644
645 std::unique_ptr<CPDF_HintTables> pHintTables(
646 new CPDF_HintTables(this, m_pLinearized.get()));
647 std::unique_ptr<CPDF_Object> pHintStream(
648 ParseIndirectObjectAt(szHintStart, 0));
649 CPDF_Stream* pStream = ToStream(pHintStream.get());
650 if (pStream && pHintTables->LoadHintStream(pStream))
651 m_pHintTables = std::move(pHintTables);
652
653 m_docStatus = PDF_DATAAVAIL_DONE;
654 return true;
655 }
656
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList)657 std::unique_ptr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
658 FX_FILESIZE pos,
659 uint32_t objnum,
660 CPDF_IndirectObjectHolder* pObjList) {
661 FX_FILESIZE SavedPos = m_syntaxParser.SavePos();
662 m_syntaxParser.RestorePos(pos);
663
664 bool bIsNumber;
665 CFX_ByteString word = m_syntaxParser.GetNextWord(&bIsNumber);
666 if (!bIsNumber)
667 return nullptr;
668
669 uint32_t parser_objnum = FXSYS_atoui(word.c_str());
670 if (objnum && parser_objnum != objnum)
671 return nullptr;
672
673 word = m_syntaxParser.GetNextWord(&bIsNumber);
674 if (!bIsNumber)
675 return nullptr;
676
677 uint32_t gennum = FXSYS_atoui(word.c_str());
678 if (m_syntaxParser.GetKeyword() != "obj") {
679 m_syntaxParser.RestorePos(SavedPos);
680 return nullptr;
681 }
682
683 std::unique_ptr<CPDF_Object> pObj =
684 m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, true);
685 m_syntaxParser.RestorePos(SavedPos);
686 return pObj;
687 }
688
IsLinearizedPDF()689 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
690 const uint32_t kReqSize = 1024;
691 if (!m_pFileAvail->IsDataAvail(0, kReqSize))
692 return LinearizationUnknown;
693
694 if (!m_pFileRead)
695 return NotLinearized;
696
697 FX_FILESIZE dwSize = m_pFileRead->GetSize();
698 if (dwSize < (FX_FILESIZE)kReqSize)
699 return LinearizationUnknown;
700
701 uint8_t buffer[1024];
702 m_pFileRead->ReadBlock(buffer, 0, kReqSize);
703 if (IsLinearizedFile(buffer, kReqSize))
704 return Linearized;
705
706 return NotLinearized;
707 }
708
IsLinearized()709 bool CPDF_DataAvail::IsLinearized() {
710 return !!m_pLinearized;
711 }
712
IsLinearizedFile(uint8_t * pData,uint32_t dwLen)713 bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) {
714 if (m_pLinearized)
715 return true;
716
717 CFX_RetainPtr<IFX_MemoryStream> file =
718 IFX_MemoryStream::Create(pData, (size_t)dwLen, false);
719 int32_t offset = GetHeaderOffset(file);
720 if (offset == -1) {
721 m_docStatus = PDF_DATAAVAIL_ERROR;
722 return false;
723 }
724
725 m_dwHeaderOffset = offset;
726 m_syntaxParser.InitParser(file, offset);
727 m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9);
728
729 bool bNumber;
730 CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber);
731 if (!bNumber)
732 return false;
733
734 uint32_t objnum = FXSYS_atoui(wordObjNum.c_str());
735 m_pLinearized = CPDF_LinearizedHeader::CreateForObject(
736 ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum));
737 if (!m_pLinearized ||
738 m_pLinearized->GetFileSize() != m_pFileRead->GetSize()) {
739 m_pLinearized.reset();
740 return false;
741 }
742 return true;
743 }
744
CheckEnd(DownloadHints * pHints)745 bool CPDF_DataAvail::CheckEnd(DownloadHints* pHints) {
746 uint32_t req_pos = (uint32_t)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0);
747 uint32_t dwSize = (uint32_t)(m_dwFileLen - req_pos);
748
749 if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) {
750 uint8_t buffer[1024];
751 m_pFileRead->ReadBlock(buffer, req_pos, dwSize);
752
753 CFX_RetainPtr<IFX_MemoryStream> file =
754 IFX_MemoryStream::Create(buffer, (size_t)dwSize, false);
755 m_syntaxParser.InitParser(file, 0);
756 m_syntaxParser.RestorePos(dwSize - 1);
757
758 if (m_syntaxParser.SearchWord("startxref", true, false, dwSize)) {
759 m_syntaxParser.GetNextWord(nullptr);
760
761 bool bNumber;
762 CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber);
763 if (!bNumber) {
764 m_docStatus = PDF_DATAAVAIL_ERROR;
765 return false;
766 }
767
768 m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str());
769 if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) {
770 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
771 return true;
772 }
773
774 m_dwLastXRefOffset = m_dwXRefOffset;
775 SetStartOffset(m_dwXRefOffset);
776 m_docStatus = PDF_DATAAVAIL_CROSSREF;
777 return true;
778 }
779
780 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
781 return true;
782 }
783
784 pHints->AddSegment(req_pos, dwSize);
785 return false;
786 }
787
CheckCrossRefStream(DownloadHints * pHints,FX_FILESIZE & xref_offset)788 int32_t CPDF_DataAvail::CheckCrossRefStream(DownloadHints* pHints,
789 FX_FILESIZE& xref_offset) {
790 xref_offset = 0;
791 uint32_t req_size =
792 (uint32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
793
794 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {
795 int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam);
796 CFX_BinaryBuf buf(iSize);
797 uint8_t* pBuf = buf.GetBuffer();
798
799 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);
800
801 CFX_RetainPtr<IFX_MemoryStream> file =
802 IFX_MemoryStream::Create(pBuf, (size_t)iSize, false);
803 m_parser.m_pSyntax->InitParser(file, 0);
804
805 bool bNumber;
806 CFX_ByteString objnum = m_parser.m_pSyntax->GetNextWord(&bNumber);
807 if (!bNumber)
808 return -1;
809
810 uint32_t objNum = FXSYS_atoui(objnum.c_str());
811 std::unique_ptr<CPDF_Object> pObj =
812 m_parser.ParseIndirectObjectAt(nullptr, 0, objNum);
813
814 if (!pObj) {
815 m_Pos += m_parser.m_pSyntax->SavePos();
816 return 0;
817 }
818
819 CPDF_Dictionary* pDict = pObj->GetDict();
820 CPDF_Name* pName = ToName(pDict ? pDict->GetObjectFor("Type") : nullptr);
821 if (pName && pName->GetString() == "XRef") {
822 m_Pos += m_parser.m_pSyntax->SavePos();
823 xref_offset = pObj->GetDict()->GetIntegerFor("Prev");
824 return 1;
825 }
826 return -1;
827 }
828 pHints->AddSegment(m_Pos, req_size);
829 return 0;
830 }
831
SetStartOffset(FX_FILESIZE dwOffset)832 void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) {
833 m_Pos = dwOffset;
834 }
835
GetNextToken(CFX_ByteString & token)836 bool CPDF_DataAvail::GetNextToken(CFX_ByteString& token) {
837 uint8_t ch;
838 if (!GetNextChar(ch))
839 return false;
840
841 while (1) {
842 while (PDFCharIsWhitespace(ch)) {
843 if (!GetNextChar(ch))
844 return false;
845 }
846
847 if (ch != '%')
848 break;
849
850 while (1) {
851 if (!GetNextChar(ch))
852 return false;
853 if (PDFCharIsLineEnding(ch))
854 break;
855 }
856 }
857
858 uint8_t buffer[256];
859 uint32_t index = 0;
860 if (PDFCharIsDelimiter(ch)) {
861 buffer[index++] = ch;
862 if (ch == '/') {
863 while (1) {
864 if (!GetNextChar(ch))
865 return false;
866
867 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
868 m_Pos--;
869 CFX_ByteString ret(buffer, index);
870 token = ret;
871 return true;
872 }
873
874 if (index < sizeof(buffer))
875 buffer[index++] = ch;
876 }
877 } else if (ch == '<') {
878 if (!GetNextChar(ch))
879 return false;
880
881 if (ch == '<')
882 buffer[index++] = ch;
883 else
884 m_Pos--;
885 } else if (ch == '>') {
886 if (!GetNextChar(ch))
887 return false;
888
889 if (ch == '>')
890 buffer[index++] = ch;
891 else
892 m_Pos--;
893 }
894
895 CFX_ByteString ret(buffer, index);
896 token = ret;
897 return true;
898 }
899
900 while (1) {
901 if (index < sizeof(buffer))
902 buffer[index++] = ch;
903
904 if (!GetNextChar(ch))
905 return false;
906
907 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
908 m_Pos--;
909 break;
910 }
911 }
912
913 token = CFX_ByteString(buffer, index);
914 return true;
915 }
916
GetNextChar(uint8_t & ch)917 bool CPDF_DataAvail::GetNextChar(uint8_t& ch) {
918 FX_FILESIZE pos = m_Pos;
919 if (pos >= m_dwFileLen)
920 return false;
921
922 if (m_bufferOffset >= pos ||
923 (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) {
924 FX_FILESIZE read_pos = pos;
925 uint32_t read_size = 512;
926 if ((FX_FILESIZE)read_size > m_dwFileLen)
927 read_size = (uint32_t)m_dwFileLen;
928
929 if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen)
930 read_pos = m_dwFileLen - read_size;
931
932 if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size))
933 return false;
934
935 m_bufferOffset = read_pos;
936 m_bufferSize = read_size;
937 }
938 ch = m_bufferData[pos - m_bufferOffset];
939 m_Pos++;
940 return true;
941 }
942
CheckCrossRefItem(DownloadHints * pHints)943 bool CPDF_DataAvail::CheckCrossRefItem(DownloadHints* pHints) {
944 int32_t iSize = 0;
945 CFX_ByteString token;
946 while (1) {
947 if (!GetNextToken(token)) {
948 iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
949 pHints->AddSegment(m_Pos, iSize);
950 return false;
951 }
952
953 if (token == "trailer") {
954 m_dwTrailerOffset = m_Pos;
955 m_docStatus = PDF_DATAAVAIL_TRAILER;
956 return true;
957 }
958 }
959 }
960
CheckAllCrossRefStream(DownloadHints * pHints)961 bool CPDF_DataAvail::CheckAllCrossRefStream(DownloadHints* pHints) {
962 FX_FILESIZE xref_offset = 0;
963
964 int32_t nRet = CheckCrossRefStream(pHints, xref_offset);
965 if (nRet == 1) {
966 if (!xref_offset) {
967 m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
968 } else {
969 m_dwCurrentXRefSteam = xref_offset;
970 m_Pos = xref_offset;
971 }
972 return true;
973 }
974
975 if (nRet == -1)
976 m_docStatus = PDF_DATAAVAIL_ERROR;
977 return false;
978 }
979
CheckCrossRef(DownloadHints * pHints)980 bool CPDF_DataAvail::CheckCrossRef(DownloadHints* pHints) {
981 int32_t iSize = 0;
982 CFX_ByteString token;
983 if (!GetNextToken(token)) {
984 iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
985 pHints->AddSegment(m_Pos, iSize);
986 return false;
987 }
988
989 if (token == "xref") {
990 while (1) {
991 if (!GetNextToken(token)) {
992 iSize =
993 (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
994 pHints->AddSegment(m_Pos, iSize);
995 m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM;
996 return false;
997 }
998
999 if (token == "trailer") {
1000 m_dwTrailerOffset = m_Pos;
1001 m_docStatus = PDF_DATAAVAIL_TRAILER;
1002 return true;
1003 }
1004 }
1005 } else {
1006 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
1007 return true;
1008 }
1009 return false;
1010 }
1011
CheckTrailerAppend(DownloadHints * pHints)1012 bool CPDF_DataAvail::CheckTrailerAppend(DownloadHints* pHints) {
1013 if (m_Pos < m_dwFileLen) {
1014 FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos();
1015 int32_t iSize = (int32_t)(
1016 dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512);
1017
1018 if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) {
1019 pHints->AddSegment(dwAppendPos, iSize);
1020 return false;
1021 }
1022 }
1023
1024 if (m_dwPrevXRefOffset) {
1025 SetStartOffset(m_dwPrevXRefOffset);
1026 m_docStatus = PDF_DATAAVAIL_CROSSREF;
1027 } else {
1028 m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
1029 }
1030 return true;
1031 }
1032
CheckTrailer(DownloadHints * pHints)1033 bool CPDF_DataAvail::CheckTrailer(DownloadHints* pHints) {
1034 int32_t iTrailerSize =
1035 (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
1036 if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) {
1037 int32_t iSize = (int32_t)(m_Pos + iTrailerSize - m_dwTrailerOffset);
1038 CFX_BinaryBuf buf(iSize);
1039 uint8_t* pBuf = buf.GetBuffer();
1040 if (!pBuf) {
1041 m_docStatus = PDF_DATAAVAIL_ERROR;
1042 return false;
1043 }
1044
1045 if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize))
1046 return false;
1047
1048 CFX_RetainPtr<IFX_MemoryStream> file =
1049 IFX_MemoryStream::Create(pBuf, (size_t)iSize, false);
1050 m_syntaxParser.InitParser(file, 0);
1051
1052 std::unique_ptr<CPDF_Object> pTrailer(
1053 m_syntaxParser.GetObject(nullptr, 0, 0, true));
1054 if (!pTrailer) {
1055 m_Pos += m_syntaxParser.SavePos();
1056 pHints->AddSegment(m_Pos, iTrailerSize);
1057 return false;
1058 }
1059
1060 if (!pTrailer->IsDictionary())
1061 return false;
1062
1063 CPDF_Dictionary* pTrailerDict = pTrailer->GetDict();
1064 CPDF_Object* pEncrypt = pTrailerDict->GetObjectFor("Encrypt");
1065 if (ToReference(pEncrypt)) {
1066 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
1067 return true;
1068 }
1069
1070 uint32_t xrefpos = GetDirectInteger(pTrailerDict, "Prev");
1071 if (xrefpos) {
1072 m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm");
1073 if (m_dwPrevXRefOffset) {
1074 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
1075 } else {
1076 m_dwPrevXRefOffset = xrefpos;
1077 if (m_dwPrevXRefOffset >= m_dwFileLen) {
1078 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
1079 } else {
1080 SetStartOffset(m_dwPrevXRefOffset);
1081 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
1082 }
1083 }
1084 return true;
1085 }
1086 m_dwPrevXRefOffset = 0;
1087 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
1088 return true;
1089 }
1090 pHints->AddSegment(m_Pos, iTrailerSize);
1091 return false;
1092 }
1093
CheckPage(uint32_t dwPage,DownloadHints * pHints)1094 bool CPDF_DataAvail::CheckPage(uint32_t dwPage, DownloadHints* pHints) {
1095 while (true) {
1096 switch (m_docStatus) {
1097 case PDF_DATAAVAIL_PAGETREE:
1098 if (!LoadDocPages(pHints))
1099 return false;
1100 break;
1101 case PDF_DATAAVAIL_PAGE:
1102 if (!LoadDocPage(dwPage, pHints))
1103 return false;
1104 break;
1105 case PDF_DATAAVAIL_ERROR:
1106 return LoadAllFile(pHints);
1107 default:
1108 m_bPagesTreeLoad = true;
1109 m_bPagesLoad = true;
1110 m_bCurPageDictLoadOK = true;
1111 m_docStatus = PDF_DATAAVAIL_PAGE;
1112 return true;
1113 }
1114 }
1115 }
1116
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode,DownloadHints * pHints)1117 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
1118 PageNode* pPageNode,
1119 DownloadHints* pHints) {
1120 bool bExists = false;
1121 std::unique_ptr<CPDF_Object> pPages = GetObject(dwPageNo, pHints, &bExists);
1122 if (!bExists) {
1123 m_docStatus = PDF_DATAAVAIL_ERROR;
1124 return false;
1125 }
1126
1127 if (!pPages)
1128 return false;
1129
1130 CPDF_Array* pArray = pPages->AsArray();
1131 if (!pArray) {
1132 m_docStatus = PDF_DATAAVAIL_ERROR;
1133 return false;
1134 }
1135
1136 pPageNode->m_type = PDF_PAGENODE_PAGES;
1137 for (size_t i = 0; i < pArray->GetCount(); ++i) {
1138 CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
1139 if (!pKid)
1140 continue;
1141
1142 auto pNode = pdfium::MakeUnique<PageNode>();
1143 pNode->m_dwPageNo = pKid->GetRefObjNum();
1144 pPageNode->m_ChildNodes.push_back(std::move(pNode));
1145 }
1146 return true;
1147 }
1148
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode,DownloadHints * pHints)1149 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
1150 PageNode* pPageNode,
1151 DownloadHints* pHints) {
1152 bool bExists = false;
1153 std::unique_ptr<CPDF_Object> pPage = GetObject(dwPageNo, pHints, &bExists);
1154 if (!bExists) {
1155 m_docStatus = PDF_DATAAVAIL_ERROR;
1156 return false;
1157 }
1158
1159 if (!pPage)
1160 return false;
1161
1162 if (pPage->IsArray()) {
1163 pPageNode->m_dwPageNo = dwPageNo;
1164 pPageNode->m_type = PDF_PAGENODE_ARRAY;
1165 return true;
1166 }
1167
1168 if (!pPage->IsDictionary()) {
1169 m_docStatus = PDF_DATAAVAIL_ERROR;
1170 return false;
1171 }
1172
1173 pPageNode->m_dwPageNo = dwPageNo;
1174 CPDF_Dictionary* pDict = pPage->GetDict();
1175 CFX_ByteString type = pDict->GetStringFor("Type");
1176 if (type == "Pages") {
1177 pPageNode->m_type = PDF_PAGENODE_PAGES;
1178 CPDF_Object* pKids = pDict->GetObjectFor("Kids");
1179 if (!pKids) {
1180 m_docStatus = PDF_DATAAVAIL_PAGE;
1181 return true;
1182 }
1183
1184 switch (pKids->GetType()) {
1185 case CPDF_Object::REFERENCE: {
1186 CPDF_Reference* pKid = pKids->AsReference();
1187 auto pNode = pdfium::MakeUnique<PageNode>();
1188 pNode->m_dwPageNo = pKid->GetRefObjNum();
1189 pPageNode->m_ChildNodes.push_back(std::move(pNode));
1190 } break;
1191 case CPDF_Object::ARRAY: {
1192 CPDF_Array* pKidsArray = pKids->AsArray();
1193 for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
1194 CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
1195 if (!pKid)
1196 continue;
1197
1198 auto pNode = pdfium::MakeUnique<PageNode>();
1199 pNode->m_dwPageNo = pKid->GetRefObjNum();
1200 pPageNode->m_ChildNodes.push_back(std::move(pNode));
1201 }
1202 } break;
1203 default:
1204 break;
1205 }
1206 } else if (type == "Page") {
1207 pPageNode->m_type = PDF_PAGENODE_PAGE;
1208 } else {
1209 m_docStatus = PDF_DATAAVAIL_ERROR;
1210 return false;
1211 }
1212 return true;
1213 }
1214
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,DownloadHints * pHints,int level)1215 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
1216 int32_t iPage,
1217 int32_t& iCount,
1218 DownloadHints* pHints,
1219 int level) {
1220 if (level >= kMaxPageRecursionDepth)
1221 return false;
1222
1223 int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes);
1224 if (iSize <= 0 || iPage >= iSize) {
1225 m_docStatus = PDF_DATAAVAIL_ERROR;
1226 return false;
1227 }
1228 for (int32_t i = 0; i < iSize; ++i) {
1229 PageNode* pNode = pageNode.m_ChildNodes[i].get();
1230 if (!pNode)
1231 continue;
1232
1233 if (pNode->m_type == PDF_PAGENODE_UNKNOWN) {
1234 // Updates the type for the unknown page node.
1235 if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode, pHints))
1236 return false;
1237 }
1238 if (pNode->m_type == PDF_PAGENODE_ARRAY) {
1239 // Updates a more specific type for the array page node.
1240 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints))
1241 return false;
1242 }
1243 switch (pNode->m_type) {
1244 case PDF_PAGENODE_PAGE:
1245 iCount++;
1246 if (iPage == iCount && m_pDocument)
1247 m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
1248 break;
1249 case PDF_PAGENODE_PAGES:
1250 if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1))
1251 return false;
1252 break;
1253 case PDF_PAGENODE_UNKNOWN:
1254 case PDF_PAGENODE_ARRAY:
1255 // Already converted above, error if we get here.
1256 return false;
1257 }
1258 if (iPage == iCount) {
1259 m_docStatus = PDF_DATAAVAIL_DONE;
1260 return true;
1261 }
1262 }
1263 return true;
1264 }
1265
LoadDocPage(uint32_t dwPage,DownloadHints * pHints)1266 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage, DownloadHints* pHints) {
1267 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1268 int32_t iPage = safePage.ValueOrDie();
1269 if (m_pDocument->GetPageCount() <= iPage ||
1270 m_pDocument->IsPageLoaded(iPage)) {
1271 m_docStatus = PDF_DATAAVAIL_DONE;
1272 return true;
1273 }
1274 if (m_PageNode.m_type == PDF_PAGENODE_PAGE) {
1275 m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR;
1276 return true;
1277 }
1278 int32_t iCount = -1;
1279 return CheckPageNode(m_PageNode, iPage, iCount, pHints, 0);
1280 }
1281
CheckPageCount(DownloadHints * pHints)1282 bool CPDF_DataAvail::CheckPageCount(DownloadHints* pHints) {
1283 bool bExists = false;
1284 std::unique_ptr<CPDF_Object> pPages =
1285 GetObject(m_PagesObjNum, pHints, &bExists);
1286 if (!bExists) {
1287 m_docStatus = PDF_DATAAVAIL_ERROR;
1288 return false;
1289 }
1290 if (!pPages)
1291 return false;
1292
1293 CPDF_Dictionary* pPagesDict = pPages->GetDict();
1294 if (!pPagesDict) {
1295 m_docStatus = PDF_DATAAVAIL_ERROR;
1296 return false;
1297 }
1298 if (!pPagesDict->KeyExist("Kids"))
1299 return true;
1300
1301 return pPagesDict->GetIntegerFor("Count") > 0;
1302 }
1303
LoadDocPages(DownloadHints * pHints)1304 bool CPDF_DataAvail::LoadDocPages(DownloadHints* pHints) {
1305 if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode, pHints))
1306 return false;
1307
1308 if (CheckPageCount(pHints)) {
1309 m_docStatus = PDF_DATAAVAIL_PAGE;
1310 return true;
1311 }
1312
1313 m_bTotalLoadPageTree = true;
1314 return false;
1315 }
1316
LoadPages(DownloadHints * pHints)1317 bool CPDF_DataAvail::LoadPages(DownloadHints* pHints) {
1318 while (!m_bPagesTreeLoad) {
1319 if (!CheckPageStatus(pHints))
1320 return false;
1321 }
1322
1323 if (m_bPagesLoad)
1324 return true;
1325
1326 m_pDocument->LoadPages();
1327 return false;
1328 }
1329
CheckLinearizedData(DownloadHints * pHints)1330 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData(
1331 DownloadHints* pHints) {
1332 if (m_bLinearedDataOK)
1333 return DataAvailable;
1334 ASSERT(m_pLinearized);
1335 if (!m_pLinearized->GetLastXRefOffset())
1336 return DataError;
1337
1338 if (!m_bMainXRefLoadTried) {
1339 FX_SAFE_UINT32 data_size = m_dwFileLen;
1340 data_size -= m_pLinearized->GetLastXRefOffset();
1341 if (!data_size.IsValid())
1342 return DataError;
1343
1344 if (!m_pFileAvail->IsDataAvail(m_pLinearized->GetLastXRefOffset(),
1345 data_size.ValueOrDie())) {
1346 pHints->AddSegment(m_pLinearized->GetLastXRefOffset(),
1347 data_size.ValueOrDie());
1348 return DataNotAvailable;
1349 }
1350
1351 CPDF_Parser::Error eRet =
1352 m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
1353 m_bMainXRefLoadTried = true;
1354 if (eRet != CPDF_Parser::SUCCESS)
1355 return DataError;
1356
1357 if (!PreparePageItem())
1358 return DataNotAvailable;
1359
1360 m_bMainXRefLoadedOK = true;
1361 m_bLinearedDataOK = true;
1362 }
1363
1364 return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
1365 }
1366
CheckPageAnnots(uint32_t dwPage,DownloadHints * pHints)1367 bool CPDF_DataAvail::CheckPageAnnots(uint32_t dwPage, DownloadHints* pHints) {
1368 if (m_objs_array.empty()) {
1369 m_ObjectSet.clear();
1370
1371 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1372 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1373 if (!pPageDict)
1374 return true;
1375
1376 CPDF_Object* pAnnots = pPageDict->GetObjectFor("Annots");
1377 if (!pAnnots)
1378 return true;
1379
1380 std::vector<CPDF_Object*> obj_array;
1381 obj_array.push_back(pAnnots);
1382 if (!AreObjectsAvailable(obj_array, false, pHints, m_objs_array))
1383 return false;
1384
1385 m_objs_array.clear();
1386 return true;
1387 }
1388
1389 std::vector<CPDF_Object*> new_objs_array;
1390 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
1391 m_objs_array = new_objs_array;
1392 return false;
1393 }
1394 m_objs_array.clear();
1395 return true;
1396 }
1397
CheckLinearizedFirstPage(uint32_t dwPage,DownloadHints * pHints)1398 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage(
1399 uint32_t dwPage,
1400 DownloadHints* pHints) {
1401 if (!m_bAnnotsLoad) {
1402 if (!CheckPageAnnots(dwPage, pHints))
1403 return DataNotAvailable;
1404 m_bAnnotsLoad = true;
1405 }
1406 const bool is_page_valid = ValidatePage(dwPage);
1407 (void)is_page_valid;
1408 ASSERT(is_page_valid);
1409 return DataAvailable;
1410 }
1411
HaveResourceAncestor(CPDF_Dictionary * pDict)1412 bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) {
1413 CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth);
1414 if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth)
1415 return false;
1416
1417 CPDF_Object* pParent = pDict->GetObjectFor("Parent");
1418 if (!pParent)
1419 return false;
1420
1421 CPDF_Dictionary* pParentDict = pParent->GetDict();
1422 if (!pParentDict)
1423 return false;
1424
1425 CPDF_Object* pRet = pParentDict->GetObjectFor("Resources");
1426 if (pRet) {
1427 m_pPageResource = pRet;
1428 return true;
1429 }
1430
1431 return HaveResourceAncestor(pParentDict);
1432 }
1433
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)1434 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
1435 uint32_t dwPage,
1436 DownloadHints* pHints) {
1437 if (!m_pDocument)
1438 return DataError;
1439
1440 if (IsFirstCheck(dwPage)) {
1441 m_bCurPageDictLoadOK = false;
1442 m_bPageLoadedOK = false;
1443 m_bAnnotsLoad = false;
1444 m_bNeedDownLoadResource = false;
1445 m_objs_array.clear();
1446 m_ObjectSet.clear();
1447 }
1448
1449 if (pdfium::ContainsKey(m_pagesLoadState, dwPage))
1450 return DataAvailable;
1451
1452 if (m_pLinearized) {
1453 if (dwPage == m_pLinearized->GetFirstPageNo()) {
1454 DocAvailStatus nRet = CheckLinearizedFirstPage(dwPage, pHints);
1455 if (nRet == DataAvailable)
1456 m_pagesLoadState.insert(dwPage);
1457 return nRet;
1458 }
1459
1460 DocAvailStatus nResult = CheckLinearizedData(pHints);
1461 if (nResult != DataAvailable)
1462 return nResult;
1463
1464 if (m_pHintTables) {
1465 nResult = m_pHintTables->CheckPage(dwPage, pHints);
1466 if (nResult != DataAvailable)
1467 return nResult;
1468 m_pagesLoadState.insert(dwPage);
1469 return GetPage(dwPage) ? DataAvailable : DataError;
1470 }
1471
1472 if (m_bMainXRefLoadedOK) {
1473 if (m_bTotalLoadPageTree) {
1474 if (!LoadPages(pHints))
1475 return DataNotAvailable;
1476 } else {
1477 if (!m_bCurPageDictLoadOK && !CheckPage(dwPage, pHints))
1478 return DataNotAvailable;
1479 }
1480 } else {
1481 if (!LoadAllFile(pHints))
1482 return DataNotAvailable;
1483 m_pDocument->GetParser()->RebuildCrossRef();
1484 ResetFirstCheck(dwPage);
1485 return DataAvailable;
1486 }
1487 } else {
1488 if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK &&
1489 !CheckPage(dwPage, pHints)) {
1490 return DataNotAvailable;
1491 }
1492 }
1493
1494 if (m_bHaveAcroForm && !m_bAcroFormLoad) {
1495 if (!CheckAcroFormSubObject(pHints))
1496 return DataNotAvailable;
1497 m_bAcroFormLoad = true;
1498 }
1499
1500 if (!m_bPageLoadedOK) {
1501 if (m_objs_array.empty()) {
1502 m_ObjectSet.clear();
1503
1504 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1505 m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1506 if (!m_pPageDict) {
1507 ResetFirstCheck(dwPage);
1508 // This is XFA page.
1509 return DataAvailable;
1510 }
1511
1512 std::vector<CPDF_Object*> obj_array;
1513 obj_array.push_back(m_pPageDict);
1514 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array))
1515 return DataNotAvailable;
1516
1517 m_objs_array.clear();
1518 } else {
1519 std::vector<CPDF_Object*> new_objs_array;
1520 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
1521 m_objs_array = new_objs_array;
1522 return DataNotAvailable;
1523 }
1524 }
1525 m_objs_array.clear();
1526 m_bPageLoadedOK = true;
1527 }
1528
1529 if (!m_bAnnotsLoad) {
1530 if (!CheckPageAnnots(dwPage, pHints))
1531 return DataNotAvailable;
1532 m_bAnnotsLoad = true;
1533 }
1534
1535 if (m_pPageDict && !m_bNeedDownLoadResource) {
1536 m_pPageResource = m_pPageDict->GetObjectFor("Resources");
1537 m_bNeedDownLoadResource =
1538 m_pPageResource || HaveResourceAncestor(m_pPageDict);
1539 }
1540
1541 if (m_bNeedDownLoadResource) {
1542 if (!CheckResources(pHints))
1543 return DataNotAvailable;
1544 m_bNeedDownLoadResource = false;
1545 }
1546
1547 m_bPageLoadedOK = false;
1548 m_bAnnotsLoad = false;
1549 m_bCurPageDictLoadOK = false;
1550
1551 ResetFirstCheck(dwPage);
1552 m_pagesLoadState.insert(dwPage);
1553 const bool is_page_valid = ValidatePage(dwPage);
1554 (void)is_page_valid;
1555 ASSERT(is_page_valid);
1556 return DataAvailable;
1557 }
1558
CheckResources(DownloadHints * pHints)1559 bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) {
1560 if (m_objs_array.empty()) {
1561 std::vector<CPDF_Object*> obj_array;
1562 obj_array.push_back(m_pPageResource);
1563 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array))
1564 return false;
1565
1566 m_objs_array.clear();
1567 return true;
1568 }
1569 std::vector<CPDF_Object*> new_objs_array;
1570 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
1571 m_objs_array = new_objs_array;
1572 return false;
1573 }
1574 m_objs_array.clear();
1575 return true;
1576 }
1577
GetLinearizedMainXRefInfo(FX_FILESIZE * pPos,uint32_t * pSize)1578 void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos,
1579 uint32_t* pSize) {
1580 if (pPos)
1581 *pPos = m_dwLastXRefOffset;
1582 if (pSize)
1583 *pSize = (uint32_t)(m_dwFileLen - m_dwLastXRefOffset);
1584 }
1585
GetPageCount() const1586 int CPDF_DataAvail::GetPageCount() const {
1587 if (m_pLinearized)
1588 return m_pLinearized->GetPageCount();
1589 return m_pDocument ? m_pDocument->GetPageCount() : 0;
1590 }
1591
GetPage(int index)1592 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
1593 if (!m_pDocument || index < 0 || index >= GetPageCount())
1594 return nullptr;
1595 CPDF_Dictionary* page = m_pDocument->GetPage(index);
1596 if (page)
1597 return page;
1598 if (!m_pLinearized || !m_pHintTables)
1599 return nullptr;
1600
1601 if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
1602 return nullptr;
1603 FX_FILESIZE szPageStartPos = 0;
1604 FX_FILESIZE szPageLength = 0;
1605 uint32_t dwObjNum = 0;
1606 const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
1607 &szPageLength, &dwObjNum);
1608 if (!bPagePosGot || !dwObjNum)
1609 return nullptr;
1610 // We should say to the document, which object is the page.
1611 m_pDocument->SetPageObjNum(index, dwObjNum);
1612 // Page object already can be parsed in document.
1613 if (!m_pDocument->GetIndirectObject(dwObjNum)) {
1614 m_syntaxParser.InitParser(
1615 m_pFileRead, pdfium::base::checked_cast<uint32_t>(szPageStartPos));
1616 m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
1617 dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument));
1618 }
1619 const bool is_page_valid = ValidatePage(index);
1620 (void)is_page_valid;
1621 ASSERT(is_page_valid);
1622 return m_pDocument->GetPage(index);
1623 }
1624
IsFormAvail(DownloadHints * pHints)1625 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
1626 DownloadHints* pHints) {
1627 if (!m_pDocument)
1628 return FormAvailable;
1629 if (m_pLinearized) {
1630 DocAvailStatus nDocStatus = CheckLinearizedData(pHints);
1631 if (nDocStatus == DataError)
1632 return FormError;
1633 if (nDocStatus == DataNotAvailable)
1634 return FormNotAvailable;
1635 }
1636
1637 if (!m_bLinearizedFormParamLoad) {
1638 CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
1639 if (!pRoot)
1640 return FormAvailable;
1641
1642 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
1643 if (!pAcroForm)
1644 return FormNotExist;
1645
1646 m_objs_array.push_back(pAcroForm->GetDict());
1647 m_bLinearizedFormParamLoad = true;
1648 }
1649
1650 std::vector<CPDF_Object*> new_objs_array;
1651 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
1652 m_objs_array = new_objs_array;
1653 return FormNotAvailable;
1654 }
1655
1656 m_objs_array.clear();
1657 const bool is_form_valid = ValidateForm();
1658 (void)is_form_valid;
1659 ASSERT(is_form_valid);
1660 return FormAvailable;
1661 }
1662
ValidatePage(uint32_t dwPage)1663 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) {
1664 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1665 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1666 if (!pPageDict)
1667 return false;
1668 std::vector<CPDF_Object*> obj_array;
1669 obj_array.push_back(pPageDict);
1670 std::vector<CPDF_Object*> dummy;
1671 return AreObjectsAvailable(obj_array, true, nullptr, dummy);
1672 }
1673
ValidateForm()1674 bool CPDF_DataAvail::ValidateForm() {
1675 CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
1676 if (!pRoot)
1677 return true;
1678 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
1679 if (!pAcroForm)
1680 return false;
1681 std::vector<CPDF_Object*> obj_array;
1682 obj_array.push_back(pAcroForm);
1683 std::vector<CPDF_Object*> dummy;
1684 return AreObjectsAvailable(obj_array, true, nullptr, dummy);
1685 }
1686
PageNode()1687 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
1688
~PageNode()1689 CPDF_DataAvail::PageNode::~PageNode() {}
1690