1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "../../../include/fpdfapi/fpdf_parser.h"
8 #include "../../../include/fpdfapi/fpdf_module.h"
9 #include "../../../include/fpdfapi/fpdf_page.h"
10 #include "../fpdf_page/pageint.h"
11 #include <limits.h>
12 #define _PARSER_OBJECT_LEVLE_ 64
13 extern const FX_LPCSTR _PDF_CharType;
IsSignatureDict(const CPDF_Dictionary * pDict)14 FX_BOOL IsSignatureDict(const CPDF_Dictionary* pDict)
15 {
16 CPDF_Object* pType = pDict->GetElementValue(FX_BSTRC("Type"));
17 if (!pType) {
18 pType = pDict->GetElementValue(FX_BSTRC("FT"));
19 if (!pType) {
20 return FALSE;
21 }
22 }
23 if (pType->GetString() == FX_BSTRC("Sig")) {
24 return TRUE;
25 }
26 return FALSE;
27 }
_CompareDWord(const void * p1,const void * p2)28 static FX_INT32 _CompareDWord(const void* p1, const void* p2)
29 {
30 return (*(FX_DWORD*)p1) - (*(FX_DWORD*)p2);
31 }
_CompareFileSize(const void * p1,const void * p2)32 static int _CompareFileSize(const void* p1, const void* p2)
33 {
34 FX_FILESIZE ret = (*(FX_FILESIZE*)p1) - (*(FX_FILESIZE*)p2);
35 if (ret > 0) {
36 return 1;
37 }
38 if (ret < 0) {
39 return -1;
40 }
41 return 0;
42 }
CPDF_Parser()43 CPDF_Parser::CPDF_Parser()
44 {
45 m_pDocument = NULL;
46 m_pTrailer = NULL;
47 m_pEncryptDict = NULL;
48 m_pSecurityHandler = NULL;
49 m_pLinearized = NULL;
50 m_dwFirstPageNo = 0;
51 m_dwXrefStartObjNum = 0;
52 m_bOwnFileRead = TRUE;
53 m_bForceUseSecurityHandler = FALSE;
54 }
~CPDF_Parser()55 CPDF_Parser::~CPDF_Parser()
56 {
57 CloseParser(FALSE);
58 }
GetLastObjNum()59 FX_DWORD CPDF_Parser::GetLastObjNum()
60 {
61 FX_DWORD dwSize = m_CrossRef.GetSize();
62 return dwSize ? dwSize - 1 : 0;
63 }
SetEncryptDictionary(CPDF_Dictionary * pDict)64 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict)
65 {
66 m_pEncryptDict = pDict;
67 }
CloseParser(FX_BOOL bReParse)68 void CPDF_Parser::CloseParser(FX_BOOL bReParse)
69 {
70 m_bVersionUpdated = FALSE;
71 if (m_pDocument && !bReParse) {
72 delete m_pDocument;
73 m_pDocument = NULL;
74 }
75 if (m_pTrailer) {
76 m_pTrailer->Release();
77 m_pTrailer = NULL;
78 }
79 ReleaseEncryptHandler();
80 SetEncryptDictionary(NULL);
81 if (m_bOwnFileRead && m_Syntax.m_pFileAccess != NULL) {
82 m_Syntax.m_pFileAccess->Release();
83 m_Syntax.m_pFileAccess = NULL;
84 }
85 FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
86 while (pos) {
87 FX_LPVOID objnum;
88 CPDF_StreamAcc* pStream;
89 m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
90 delete pStream;
91 }
92 m_ObjectStreamMap.RemoveAll();
93 m_SortedOffset.RemoveAll();
94 m_CrossRef.RemoveAll();
95 m_V5Type.RemoveAll();
96 m_ObjVersion.RemoveAll();
97 FX_INT32 iLen = m_Trailers.GetSize();
98 for (FX_INT32 i = 0; i < iLen; ++i) {
99 m_Trailers.GetAt(i)->Release();
100 }
101 m_Trailers.RemoveAll();
102 if (m_pLinearized) {
103 m_pLinearized->Release();
104 m_pLinearized = NULL;
105 }
106 }
GetHeaderOffset(IFX_FileRead * pFile)107 static FX_INT32 GetHeaderOffset(IFX_FileRead* pFile)
108 {
109 FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
110 FX_BYTE buf[4];
111 FX_INT32 offset = 0;
112 while (1) {
113 if (!pFile->ReadBlock(buf, offset, 4)) {
114 return -1;
115 }
116 if (*(FX_DWORD*)buf == tag) {
117 return offset;
118 }
119 offset ++;
120 if (offset > 1024) {
121 return -1;
122 }
123 }
124 return -1;
125 }
StartParse(FX_LPCSTR filename,FX_BOOL bReParse)126 FX_DWORD CPDF_Parser::StartParse(FX_LPCSTR filename, FX_BOOL bReParse)
127 {
128 IFX_FileRead* pFileAccess = FX_CreateFileRead(filename);
129 if (!pFileAccess) {
130 return PDFPARSE_ERROR_FILE;
131 }
132 return StartParse(pFileAccess, bReParse);
133 }
StartParse(FX_LPCWSTR filename,FX_BOOL bReParse)134 FX_DWORD CPDF_Parser::StartParse(FX_LPCWSTR filename, FX_BOOL bReParse)
135 {
136 IFX_FileRead* pFileAccess = FX_CreateFileRead(filename);
137 if (!pFileAccess) {
138 return PDFPARSE_ERROR_FILE;
139 }
140 return StartParse(pFileAccess, bReParse);
141 }
142 CPDF_SecurityHandler* FPDF_CreateStandardSecurityHandler();
143 CPDF_SecurityHandler* FPDF_CreatePubKeyHandler(void*);
StartParse(IFX_FileRead * pFileAccess,FX_BOOL bReParse,FX_BOOL bOwnFileRead)144 FX_DWORD CPDF_Parser::StartParse(IFX_FileRead* pFileAccess, FX_BOOL bReParse, FX_BOOL bOwnFileRead)
145 {
146 CloseParser(bReParse);
147 m_bXRefStream = FALSE;
148 m_LastXRefOffset = 0;
149 m_bOwnFileRead = bOwnFileRead;
150 FX_INT32 offset = GetHeaderOffset(pFileAccess);
151 if (offset == -1) {
152 if (bOwnFileRead && pFileAccess) {
153 pFileAccess->Release();
154 }
155 return PDFPARSE_ERROR_FORMAT;
156 }
157 m_Syntax.InitParser(pFileAccess, offset);
158 FX_BYTE ch;
159 m_Syntax.GetCharAt(5, ch);
160 m_FileVersion = (ch - '0') * 10;
161 m_Syntax.GetCharAt(7, ch);
162 m_FileVersion += ch - '0';
163 m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9);
164 if (!bReParse) {
165 m_pDocument = FX_NEW CPDF_Document(this);
166 }
167 FX_BOOL bXRefRebuilt = FALSE;
168 if (m_Syntax.SearchWord(FX_BSTRC("startxref"), TRUE, FALSE, 4096)) {
169 FX_FILESIZE startxref_offset = m_Syntax.SavePos();
170 FX_LPVOID pResult = FXSYS_bsearch(&startxref_offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
171 if (pResult == NULL) {
172 m_SortedOffset.Add(startxref_offset);
173 }
174 m_Syntax.GetKeyword();
175 FX_BOOL bNumber;
176 CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(bNumber);
177 if (!bNumber) {
178 return PDFPARSE_ERROR_FORMAT;
179 }
180 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
181 if (!LoadAllCrossRefV4(m_LastXRefOffset) && !LoadAllCrossRefV5(m_LastXRefOffset)) {
182 if (!RebuildCrossRef()) {
183 return PDFPARSE_ERROR_FORMAT;
184 }
185 bXRefRebuilt = TRUE;
186 m_LastXRefOffset = 0;
187 }
188 } else {
189 if (!RebuildCrossRef()) {
190 return PDFPARSE_ERROR_FORMAT;
191 }
192 bXRefRebuilt = TRUE;
193 }
194 FX_DWORD dwRet = SetEncryptHandler();
195 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
196 return dwRet;
197 }
198 m_pDocument->LoadDoc();
199 if (m_pDocument->GetRoot() == NULL || m_pDocument->GetPageCount() == 0) {
200 if (bXRefRebuilt) {
201 return PDFPARSE_ERROR_FORMAT;
202 }
203 ReleaseEncryptHandler();
204 if (!RebuildCrossRef()) {
205 return PDFPARSE_ERROR_FORMAT;
206 }
207 dwRet = SetEncryptHandler();
208 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
209 return dwRet;
210 }
211 m_pDocument->LoadDoc();
212 if (m_pDocument->GetRoot() == NULL) {
213 return PDFPARSE_ERROR_FORMAT;
214 }
215 }
216 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
217 FX_DWORD RootObjNum = GetRootObjNum();
218 if (RootObjNum == 0) {
219 ReleaseEncryptHandler();
220 RebuildCrossRef();
221 RootObjNum = GetRootObjNum();
222 if (RootObjNum == 0) {
223 return PDFPARSE_ERROR_FORMAT;
224 }
225 dwRet = SetEncryptHandler();
226 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
227 return dwRet;
228 }
229 }
230 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
231 CPDF_Reference* pMetadata = (CPDF_Reference*)m_pDocument->GetRoot()->GetElement(FX_BSTRC("Metadata"));
232 if (pMetadata && pMetadata->GetType() == PDFOBJ_REFERENCE) {
233 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
234 }
235 }
236 return PDFPARSE_ERROR_SUCCESS;
237 }
SetEncryptHandler()238 FX_DWORD CPDF_Parser::SetEncryptHandler()
239 {
240 ReleaseEncryptHandler();
241 SetEncryptDictionary(NULL);
242 if (m_pTrailer == NULL) {
243 return PDFPARSE_ERROR_FORMAT;
244 }
245 CPDF_Object* pEncryptObj = m_pTrailer->GetElement(FX_BSTRC("Encrypt"));
246 if (pEncryptObj) {
247 if (pEncryptObj->GetType() == PDFOBJ_DICTIONARY) {
248 SetEncryptDictionary((CPDF_Dictionary*)pEncryptObj);
249 } else if (pEncryptObj->GetType() == PDFOBJ_REFERENCE) {
250 pEncryptObj = m_pDocument->GetIndirectObject(((CPDF_Reference*)pEncryptObj)->GetRefObjNum());
251 if (pEncryptObj) {
252 SetEncryptDictionary(pEncryptObj->GetDict());
253 }
254 }
255 }
256 if (m_bForceUseSecurityHandler) {
257 FX_DWORD err = PDFPARSE_ERROR_HANDLER;
258 if (m_pSecurityHandler == NULL) {
259 return PDFPARSE_ERROR_HANDLER;
260 }
261 if (!m_pSecurityHandler->OnInit(this, m_pEncryptDict)) {
262 return err;
263 }
264 CPDF_CryptoHandler* pCryptoHandler = m_pSecurityHandler->CreateCryptoHandler();
265 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler)) {
266 delete pCryptoHandler;
267 pCryptoHandler = NULL;
268 return PDFPARSE_ERROR_HANDLER;
269 }
270 m_Syntax.SetEncrypt(pCryptoHandler);
271 } else if (m_pEncryptDict) {
272 CFX_ByteString filter = m_pEncryptDict->GetString(FX_BSTRC("Filter"));
273 CPDF_SecurityHandler* pSecurityHandler = NULL;
274 FX_DWORD err = PDFPARSE_ERROR_HANDLER;
275 if (filter == FX_BSTRC("Standard")) {
276 pSecurityHandler = FPDF_CreateStandardSecurityHandler();
277 err = PDFPARSE_ERROR_PASSWORD;
278 }
279 if (pSecurityHandler == NULL) {
280 return PDFPARSE_ERROR_HANDLER;
281 }
282 if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) {
283 delete pSecurityHandler;
284 pSecurityHandler = NULL;
285 return err;
286 }
287 m_pSecurityHandler = pSecurityHandler;
288 CPDF_CryptoHandler* pCryptoHandler = pSecurityHandler->CreateCryptoHandler();
289 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler)) {
290 delete pCryptoHandler;
291 pCryptoHandler = NULL;
292 return PDFPARSE_ERROR_HANDLER;
293 }
294 m_Syntax.SetEncrypt(pCryptoHandler);
295 }
296 return PDFPARSE_ERROR_SUCCESS;
297 }
ReleaseEncryptHandler()298 void CPDF_Parser::ReleaseEncryptHandler()
299 {
300 if (m_Syntax.m_pCryptoHandler) {
301 delete m_Syntax.m_pCryptoHandler;
302 m_Syntax.m_pCryptoHandler = NULL;
303 }
304 if (m_pSecurityHandler && !m_bForceUseSecurityHandler) {
305 delete m_pSecurityHandler;
306 m_pSecurityHandler = NULL;
307 }
308 }
GetObjectOffset(FX_DWORD objnum)309 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum)
310 {
311 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
312 return 0;
313 }
314 if (m_V5Type[objnum] == 1) {
315 return m_CrossRef[objnum];
316 }
317 if (m_V5Type[objnum] == 2) {
318 return m_CrossRef[(FX_INT32)m_CrossRef[objnum]];
319 }
320 return 0;
321 }
GetDirectInteger(CPDF_Dictionary * pDict,FX_BSTR key)322 static FX_INT32 GetDirectInteger(CPDF_Dictionary* pDict, FX_BSTR key)
323 {
324 CPDF_Object* pObj = pDict->GetElement(key);
325 if (pObj == NULL) {
326 return 0;
327 }
328 if (pObj->GetType() == PDFOBJ_NUMBER) {
329 return ((CPDF_Number*)pObj)->GetInteger();
330 }
331 return 0;
332 }
CheckDirectType(CPDF_Dictionary * pDict,FX_BSTR key,FX_INT32 iType)333 static FX_BOOL CheckDirectType(CPDF_Dictionary* pDict, FX_BSTR key, FX_INT32 iType)
334 {
335 CPDF_Object* pObj = pDict->GetElement(key);
336 if (!pObj) {
337 return TRUE;
338 }
339 return pObj->GetType() == iType;
340 }
LoadAllCrossRefV4(FX_FILESIZE xrefpos)341 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos)
342 {
343 if (!LoadCrossRefV4(xrefpos, 0, TRUE, FALSE)) {
344 return FALSE;
345 }
346 m_pTrailer = LoadTrailerV4();
347 if (m_pTrailer == NULL) {
348 return FALSE;
349 }
350 FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
351 if (xrefsize <= 0 || xrefsize > (1 << 20)) {
352 return FALSE;
353 }
354 m_CrossRef.SetSize(xrefsize);
355 m_V5Type.SetSize(xrefsize);
356 CFX_FileSizeArray CrossRefList, XRefStreamList;
357 CrossRefList.Add(xrefpos);
358 XRefStreamList.Add(GetDirectInteger(m_pTrailer, FX_BSTRC("XRefStm")));
359 if (!CheckDirectType(m_pTrailer, FX_BSTRC("Prev"), PDFOBJ_NUMBER)) {
360 return FALSE;
361 }
362 FX_FILESIZE newxrefpos = GetDirectInteger(m_pTrailer, FX_BSTRC("Prev"));
363 if (newxrefpos == xrefpos) {
364 return FALSE;
365 }
366 xrefpos = newxrefpos;
367 while (xrefpos) {
368 CrossRefList.InsertAt(0, xrefpos);
369 LoadCrossRefV4(xrefpos, 0, TRUE, FALSE);
370 CPDF_Dictionary* pDict = LoadTrailerV4();
371 if (pDict == NULL) {
372 return FALSE;
373 }
374 if (!CheckDirectType(pDict, FX_BSTRC("Prev"), PDFOBJ_NUMBER)) {
375 pDict->Release();
376 return FALSE;
377 }
378 newxrefpos = GetDirectInteger(pDict, FX_BSTRC("Prev"));
379 if (newxrefpos == xrefpos) {
380 pDict->Release();
381 return FALSE;
382 }
383 xrefpos = newxrefpos;
384 XRefStreamList.InsertAt(0, pDict->GetInteger(FX_BSTRC("XRefStm")));
385 m_Trailers.Add(pDict);
386 }
387 for (FX_INT32 i = 0; i < CrossRefList.GetSize(); i ++)
388 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE, i == 0)) {
389 return FALSE;
390 }
391 return TRUE;
392 }
LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,FX_DWORD dwObjCount)393 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, FX_DWORD dwObjCount)
394 {
395 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) {
396 return FALSE;
397 }
398 m_pTrailer = LoadTrailerV4();
399 if (m_pTrailer == NULL) {
400 return FALSE;
401 }
402 FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
403 if (xrefsize == 0) {
404 return FALSE;
405 }
406 CFX_FileSizeArray CrossRefList, XRefStreamList;
407 CrossRefList.Add(xrefpos);
408 XRefStreamList.Add(GetDirectInteger(m_pTrailer, FX_BSTRC("XRefStm")));
409 xrefpos = GetDirectInteger(m_pTrailer, FX_BSTRC("Prev"));
410 while (xrefpos) {
411 CrossRefList.InsertAt(0, xrefpos);
412 LoadCrossRefV4(xrefpos, 0, TRUE, FALSE);
413 CPDF_Dictionary* pDict = LoadTrailerV4();
414 if (pDict == NULL) {
415 return FALSE;
416 }
417 xrefpos = GetDirectInteger(pDict, FX_BSTRC("Prev"));
418 XRefStreamList.InsertAt(0, pDict->GetInteger(FX_BSTRC("XRefStm")));
419 m_Trailers.Add(pDict);
420 }
421 for (FX_INT32 i = 1; i < CrossRefList.GetSize(); i ++)
422 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE, i == 0)) {
423 return FALSE;
424 }
425 return TRUE;
426 }
LoadLinearizedCrossRefV4(FX_FILESIZE pos,FX_DWORD dwObjCount)427 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount)
428 {
429 FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset;
430 m_Syntax.RestorePos(dwStartPos);
431 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
432 if (pResult == NULL) {
433 m_SortedOffset.Add(pos);
434 }
435 FX_DWORD start_objnum = 0;
436 FX_DWORD count = dwObjCount;
437 FX_FILESIZE SavedPos = m_Syntax.SavePos();
438 FX_INT32 recordsize = 20;
439 char* pBuf = FX_Alloc(char, 1024 * recordsize + 1);
440 pBuf[1024 * recordsize] = '\0';
441 FX_INT32 nBlocks = count / 1024 + 1;
442 for (FX_INT32 block = 0; block < nBlocks; block ++) {
443 FX_INT32 block_size = block == nBlocks - 1 ? count % 1024 : 1024;
444 FX_DWORD dwReadSize = block_size * recordsize;
445 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen) {
446 FX_Free(pBuf);
447 return FALSE;
448 }
449 if (!m_Syntax.ReadBlock((FX_LPBYTE)pBuf, dwReadSize)) {
450 FX_Free(pBuf);
451 return FALSE;
452 }
453 for (FX_INT32 i = 0; i < block_size; i ++) {
454 FX_DWORD objnum = start_objnum + block * 1024 + i;
455 char* pEntry = pBuf + i * recordsize;
456 if (pEntry[17] == 'f') {
457 m_CrossRef.SetAtGrow(objnum, 0);
458 m_V5Type.SetAtGrow(objnum, 0);
459 } else {
460 FX_INT32 offset = FXSYS_atoi(pEntry);
461 if (offset == 0) {
462 for (FX_INT32 c = 0; c < 10; c ++) {
463 if (pEntry[c] < '0' || pEntry[c] > '9') {
464 FX_Free(pBuf);
465 return FALSE;
466 }
467 }
468 }
469 m_CrossRef.SetAtGrow(objnum, offset);
470 FX_INT32 version = FXSYS_atoi(pEntry + 11);
471 if (version >= 1) {
472 m_bVersionUpdated = TRUE;
473 }
474 m_ObjVersion.SetAtGrow(objnum, version);
475 if (m_CrossRef[objnum] < m_Syntax.m_FileLen) {
476 FX_LPVOID pResult = FXSYS_bsearch(&m_CrossRef[objnum], m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
477 if (pResult == NULL) {
478 m_SortedOffset.Add(m_CrossRef[objnum]);
479 }
480 }
481 m_V5Type.SetAtGrow(objnum, 1);
482 }
483 }
484 }
485 FX_Free(pBuf);
486 m_Syntax.RestorePos(SavedPos + count * recordsize);
487 return TRUE;
488 }
LoadCrossRefV4(FX_FILESIZE pos,FX_FILESIZE streampos,FX_BOOL bSkip,FX_BOOL bFirst)489 FX_BOOL CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip, FX_BOOL bFirst)
490 {
491 m_Syntax.RestorePos(pos);
492 if (m_Syntax.GetKeyword() != FX_BSTRC("xref")) {
493 return FALSE;
494 }
495 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
496 if (pResult == NULL) {
497 m_SortedOffset.Add(pos);
498 }
499 if (streampos) {
500 FX_LPVOID pResult = FXSYS_bsearch(&streampos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
501 if (pResult == NULL) {
502 m_SortedOffset.Add(streampos);
503 }
504 }
505 while (1) {
506 FX_FILESIZE SavedPos = m_Syntax.SavePos();
507 FX_BOOL bIsNumber;
508 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
509 if (word.IsEmpty()) {
510 return FALSE;
511 }
512 if (!bIsNumber) {
513 m_Syntax.RestorePos(SavedPos);
514 break;
515 }
516 FX_DWORD start_objnum = FXSYS_atoi(word);
517 if (start_objnum >= (1 << 20)) {
518 return FALSE;
519 }
520 FX_DWORD count = m_Syntax.GetDirectNum();
521 m_Syntax.ToNextWord();
522 SavedPos = m_Syntax.SavePos();
523 FX_BOOL bFirstItem = FALSE;
524 FX_INT32 recordsize = 20;
525 if (bFirst) {
526 bFirstItem = TRUE;
527 }
528 m_dwXrefStartObjNum = start_objnum;
529 if (!bSkip) {
530 char* pBuf = FX_Alloc(char, 1024 * recordsize + 1);
531 pBuf[1024 * recordsize] = '\0';
532 FX_INT32 nBlocks = count / 1024 + 1;
533 FX_BOOL bFirstBlock = TRUE;
534 for (FX_INT32 block = 0; block < nBlocks; block ++) {
535 FX_INT32 block_size = block == nBlocks - 1 ? count % 1024 : 1024;
536 m_Syntax.ReadBlock((FX_LPBYTE)pBuf, block_size * recordsize);
537 for (FX_INT32 i = 0; i < block_size; i ++) {
538 FX_DWORD objnum = start_objnum + block * 1024 + i;
539 char* pEntry = pBuf + i * recordsize;
540 if (pEntry[17] == 'f') {
541 if (bFirstItem) {
542 objnum = 0;
543 bFirstItem = FALSE;
544 }
545 if (bFirstBlock) {
546 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
547 FX_INT32 version = FXSYS_atoi(pEntry + 11);
548 if (offset == 0 && version == 65535 && start_objnum != 0) {
549 start_objnum--;
550 objnum = 0;
551 }
552 }
553 m_CrossRef.SetAtGrow(objnum, 0);
554 m_V5Type.SetAtGrow(objnum, 0);
555 } else {
556 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
557 if (offset == 0) {
558 for (FX_INT32 c = 0; c < 10; c ++) {
559 if (pEntry[c] < '0' || pEntry[c] > '9') {
560 FX_Free(pBuf);
561 return FALSE;
562 }
563 }
564 }
565 m_CrossRef.SetAtGrow(objnum, offset);
566 FX_INT32 version = FXSYS_atoi(pEntry + 11);
567 if (version >= 1) {
568 m_bVersionUpdated = TRUE;
569 }
570 m_ObjVersion.SetAtGrow(objnum, version);
571 if (m_CrossRef[objnum] < m_Syntax.m_FileLen) {
572 FX_LPVOID pResult = FXSYS_bsearch(&m_CrossRef[objnum], m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
573 if (pResult == NULL) {
574 m_SortedOffset.Add(m_CrossRef[objnum]);
575 }
576 }
577 m_V5Type.SetAtGrow(objnum, 1);
578 }
579 if (bFirstBlock) {
580 bFirstBlock = FALSE;
581 }
582 }
583 }
584 FX_Free(pBuf);
585 }
586 m_Syntax.RestorePos(SavedPos + count * recordsize);
587 }
588 if (streampos)
589 if (!LoadCrossRefV5(streampos, streampos, FALSE)) {
590 return FALSE;
591 }
592 return TRUE;
593 }
LoadAllCrossRefV5(FX_FILESIZE xrefpos)594 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos)
595 {
596 if (!LoadCrossRefV5(xrefpos, xrefpos, TRUE)) {
597 return FALSE;
598 }
599 while (xrefpos)
600 if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
601 return FALSE;
602 }
603 m_ObjectStreamMap.InitHashTable(101, FALSE);
604 m_bXRefStream = TRUE;
605 return TRUE;
606 }
RebuildCrossRef()607 FX_BOOL CPDF_Parser::RebuildCrossRef()
608 {
609 m_CrossRef.RemoveAll();
610 m_V5Type.RemoveAll();
611 m_SortedOffset.RemoveAll();
612 m_ObjVersion.RemoveAll();
613 if (m_pTrailer) {
614 m_pTrailer->Release();
615 m_pTrailer = NULL;
616 }
617 FX_INT32 status = 0;
618 FX_INT32 inside_index = 0;
619 FX_DWORD objnum, gennum;
620 FX_INT32 depth = 0;
621 FX_LPBYTE buffer = FX_Alloc(FX_BYTE, 4096);
622 FX_FILESIZE pos = m_Syntax.m_HeaderOffset;
623 FX_FILESIZE start_pos, start_pos1;
624 FX_FILESIZE last_obj = -1, last_xref = -1, last_trailer = -1;
625 FX_BOOL bInUpdate = FALSE;
626 while (pos < m_Syntax.m_FileLen) {
627 FX_BOOL bOverFlow = FALSE;
628 FX_DWORD size = (FX_DWORD)(m_Syntax.m_FileLen - pos);
629 if (size > 4096) {
630 size = 4096;
631 }
632 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer, pos, size)) {
633 break;
634 }
635 for (FX_DWORD i = 0; i < size; i ++) {
636 FX_BYTE byte = buffer[i];
637 FX_LPBYTE pData = buffer + i;
638 switch (status) {
639 case 0:
640 if (_PDF_CharType[byte] == 'W') {
641 status = 1;
642 }
643 if (byte <= '9' && byte >= '0') {
644 --i;
645 status = 1;
646 }
647 if (byte == '%') {
648 inside_index = 0;
649 status = 9;
650 }
651 if (byte == '(') {
652 status = 10;
653 depth = 1;
654 }
655 if (byte == '<') {
656 inside_index = 1;
657 status = 11;
658 }
659 if (byte == '\\') {
660 status = 13;
661 }
662 if (byte == 't') {
663 status = 7;
664 inside_index = 1;
665 }
666 break;
667 case 1:
668 if (_PDF_CharType[byte] == 'W') {
669 break;
670 } else if (byte <= '9' && byte >= '0') {
671 start_pos = pos + i;
672 status = 2;
673 objnum = byte - '0';
674 } else if (byte == 't') {
675 status = 7;
676 inside_index = 1;
677 } else if (byte == 'x') {
678 status = 8;
679 inside_index = 1;
680 } else {
681 --i;
682 status = 0;
683 }
684 break;
685 case 2:
686 if (byte <= '9' && byte >= '0') {
687 objnum = objnum * 10 + byte - '0';
688 break;
689 } else if (_PDF_CharType[byte] == 'W') {
690 status = 3;
691 } else {
692 --i;
693 status = 14;
694 inside_index = 0;
695 }
696 break;
697 case 3:
698 if (byte <= '9' && byte >= '0') {
699 start_pos1 = pos + i;
700 status = 4;
701 gennum = byte - '0';
702 } else if (_PDF_CharType[byte] == 'W') {
703 break;
704 } else if (byte == 't') {
705 status = 7;
706 inside_index = 1;
707 } else {
708 --i;
709 status = 0;
710 }
711 break;
712 case 4:
713 if (byte <= '9' && byte >= '0') {
714 gennum = gennum * 10 + byte - '0';
715 break;
716 } else if (_PDF_CharType[byte] == 'W') {
717 status = 5;
718 } else {
719 --i;
720 status = 0;
721 }
722 break;
723 case 5:
724 if (byte == 'o') {
725 status = 6;
726 inside_index = 1;
727 } else if (_PDF_CharType[byte] == 'W') {
728 break;
729 } else if (byte <= '9' && byte >= '0') {
730 objnum = gennum;
731 gennum = byte - '0';
732 start_pos = start_pos1;
733 start_pos1 = pos + i;
734 status = 4;
735 } else if (byte == 't') {
736 status = 7;
737 inside_index = 1;
738 } else {
739 --i;
740 status = 0;
741 }
742 break;
743 case 6:
744 switch (inside_index) {
745 case 1:
746 if (byte != 'b') {
747 --i;
748 status = 0;
749 } else {
750 inside_index ++;
751 }
752 break;
753 case 2:
754 if (byte != 'j') {
755 --i;
756 status = 0;
757 } else {
758 inside_index ++;
759 }
760 break;
761 case 3:
762 if (_PDF_CharType[byte] == 'W' || _PDF_CharType[byte] == 'D') {
763 if (objnum > 0x1000000) {
764 status = 0;
765 break;
766 }
767 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;
768 last_obj = start_pos;
769 FX_LPVOID pResult = FXSYS_bsearch(&obj_pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
770 if (pResult == NULL) {
771 m_SortedOffset.Add(obj_pos);
772 }
773 FX_FILESIZE obj_end = 0;
774 CPDF_Object *pObject = ParseIndirectObjectAtByStrict(m_pDocument, obj_pos, objnum, NULL, &obj_end);
775 if (pObject) {
776 int iType = pObject->GetType();
777 if (iType == PDFOBJ_STREAM) {
778 CPDF_Stream* pStream = (CPDF_Stream*)pObject;
779 CPDF_Dictionary* pDict = pStream->GetDict();
780 if (pDict) {
781 if (pDict->KeyExist(FX_BSTRC("Type"))) {
782 CFX_ByteString bsValue = pDict->GetString(FX_BSTRC("Type"));
783 if (bsValue == FX_BSTRC("XRef") && pDict->KeyExist(FX_BSTRC("Size"))) {
784 CPDF_Object* pRoot = pDict->GetElement(FX_BSTRC("Root"));
785 if (pRoot && pRoot->GetDict() && pRoot->GetDict()->GetElement(FX_BSTRC("Pages"))) {
786 if (m_pTrailer) {
787 m_pTrailer->Release();
788 }
789 m_pTrailer = (CPDF_Dictionary*)pDict->Clone();
790 }
791 }
792 }
793 }
794 }
795 }
796 FX_FILESIZE offset = 0;
797 m_Syntax.RestorePos(obj_pos);
798 offset = m_Syntax.FindTag(FX_BSTRC("obj"), 0);
799 if (offset == -1) {
800 offset = 0;
801 } else {
802 offset += 3;
803 }
804 FX_FILESIZE nLen = obj_end - obj_pos - offset;
805 if ((FX_DWORD)nLen > size - i) {
806 pos = obj_end + m_Syntax.m_HeaderOffset;
807 bOverFlow = TRUE;
808 } else {
809 i += (FX_DWORD)nLen;
810 }
811 if (m_CrossRef.GetSize() > (FX_INT32)objnum && m_CrossRef[objnum]) {
812 if (pObject) {
813 FX_DWORD oldgen = m_ObjVersion.GetAt(objnum);
814 m_CrossRef[objnum] = obj_pos;
815 m_ObjVersion.SetAt(objnum, (FX_SHORT)gennum);
816 if (oldgen != gennum) {
817 m_bVersionUpdated = TRUE;
818 }
819 }
820 } else {
821 m_CrossRef.SetAtGrow(objnum, obj_pos);
822 m_V5Type.SetAtGrow(objnum, 1);
823 m_ObjVersion.SetAtGrow(objnum, (FX_SHORT)gennum);
824 }
825 if (pObject) {
826 pObject->Release();
827 }
828 }
829 --i;
830 status = 0;
831 break;
832 }
833 break;
834 case 7:
835 if (inside_index == 7) {
836 if (_PDF_CharType[byte] == 'W' || _PDF_CharType[byte] == 'D') {
837 last_trailer = pos + i - 7;
838 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
839 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0);
840 if (pObj) {
841 if (pObj->GetType() != PDFOBJ_DICTIONARY && pObj->GetType() != PDFOBJ_STREAM) {
842 pObj->Release();
843 } else {
844 CPDF_Dictionary* pTrailer = NULL;
845 if (pObj->GetType() == PDFOBJ_STREAM) {
846 pTrailer = ((CPDF_Stream*)pObj)->GetDict();
847 } else {
848 pTrailer = (CPDF_Dictionary*)pObj;
849 }
850 if (pTrailer) {
851 if (m_pTrailer) {
852 CPDF_Object* pRoot = pTrailer->GetElement(FX_BSTRC("Root"));
853 if (pRoot == NULL || (pRoot->GetType() == PDFOBJ_REFERENCE &&
854 (FX_DWORD)m_CrossRef.GetSize() > ((CPDF_Reference*)pRoot)->GetRefObjNum() &&
855 m_CrossRef.GetAt(((CPDF_Reference*)pRoot)->GetRefObjNum()) != 0)) {
856 FX_POSITION pos = pTrailer->GetStartPos();
857 while (pos) {
858 CFX_ByteString key;
859 CPDF_Object* pObj = pTrailer->GetNextElement(pos, key);
860 m_pTrailer->SetAt(key, pObj->Clone(), m_pDocument);
861 }
862 pObj->Release();
863 } else {
864 pObj->Release();
865 }
866 } else {
867 if (pObj->GetType() == PDFOBJ_STREAM) {
868 m_pTrailer = (CPDF_Dictionary*)pTrailer->Clone();
869 pObj->Release();
870 } else {
871 m_pTrailer = pTrailer;
872 }
873 FX_FILESIZE dwSavePos = m_Syntax.SavePos();
874 CFX_ByteString strWord = m_Syntax.GetKeyword();
875 if (!strWord.Compare(FX_BSTRC("startxref"))) {
876 FX_BOOL bNumber = FALSE;
877 CFX_ByteString bsOffset = m_Syntax.GetNextWord(bNumber);
878 if (bNumber) {
879 m_LastXRefOffset = FXSYS_atoi(bsOffset);
880 }
881 }
882 m_Syntax.RestorePos(dwSavePos);
883 }
884 } else {
885 pObj->Release();
886 }
887 bInUpdate = TRUE;
888 }
889 }
890 }
891 --i;
892 status = 0;
893 } else if (byte == "trailer"[inside_index]) {
894 inside_index ++;
895 } else {
896 --i;
897 status = 0;
898 }
899 break;
900 case 8:
901 if (inside_index == 4) {
902 last_xref = pos + i - 4;
903 status = 1;
904 } else if (byte == "xref"[inside_index]) {
905 inside_index ++;
906 } else {
907 --i;
908 status = 0;
909 }
910 break;
911 case 9:
912 if (byte == '\r' || byte == '\n') {
913 status = 0;
914 }
915 break;
916 case 10:
917 if (byte == ')') {
918 if (depth > 0) {
919 depth--;
920 }
921 } else if (byte == '(') {
922 depth++;
923 }
924 if (!depth) {
925 status = 0;
926 }
927 break;
928 case 11:
929 if (byte == '<' && inside_index == 1) {
930 status = 12;
931 } else if (byte == '>') {
932 status = 0;
933 }
934 inside_index = 0;
935 break;
936 case 12:
937 --i;
938 status = 0;
939 break;
940 case 13:
941 if (_PDF_CharType[byte] == 'D' || _PDF_CharType[byte] == 'W') {
942 --i;
943 status = 0;
944 }
945 break;
946 case 14:
947 if (_PDF_CharType[byte] == 'W') {
948 status = 0;
949 } else if (byte == '%' || byte == '(' || byte == '<' || byte == '\\') {
950 status = 0;
951 --i;
952 } else if (inside_index == 6) {
953 status = 0;
954 --i;
955 } else if (byte == "endobj"[inside_index]) {
956 inside_index++;
957 }
958 break;
959 }
960 if (bOverFlow) {
961 size = 0;
962 break;
963 }
964 }
965 pos += size;
966 }
967 if (last_xref != -1 && last_xref > last_obj) {
968 last_trailer = last_xref;
969 } else if (last_trailer == -1 || last_xref < last_obj) {
970 last_trailer = m_Syntax.m_FileLen;
971 }
972 FX_FILESIZE offset = last_trailer - m_Syntax.m_HeaderOffset;
973 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
974 if (pResult == NULL) {
975 m_SortedOffset.Add(offset);
976 }
977 FX_Free(buffer);
978 return TRUE;
979 }
_GetVarInt(FX_LPCBYTE p,FX_INT32 n)980 static FX_DWORD _GetVarInt(FX_LPCBYTE p, FX_INT32 n)
981 {
982 FX_DWORD result = 0;
983 for (FX_INT32 i = 0; i < n; i ++) {
984 result = result * 256 + p[i];
985 }
986 return result;
987 }
LoadCrossRefV5(FX_FILESIZE pos,FX_FILESIZE & prev,FX_BOOL bMainXRef)988 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE pos, FX_FILESIZE& prev, FX_BOOL bMainXRef)
989 {
990 CPDF_Stream* pStream = (CPDF_Stream*)ParseIndirectObjectAt(m_pDocument, pos, 0, NULL);
991 if (!pStream) {
992 return FALSE;
993 }
994 if (m_pDocument) {
995 m_pDocument->InsertIndirectObject(pStream->m_ObjNum, pStream);
996 }
997 if (pStream->GetType() != PDFOBJ_STREAM) {
998 return FALSE;
999 }
1000 prev = pStream->GetDict()->GetInteger(FX_BSTRC("Prev"));
1001 FX_INT32 size = pStream->GetDict()->GetInteger(FX_BSTRC("Size"));
1002 if (size < 0) {
1003 pStream->Release();
1004 return FALSE;
1005 }
1006 if (bMainXRef) {
1007 m_pTrailer = (CPDF_Dictionary*)pStream->GetDict()->Clone();
1008 m_CrossRef.SetSize(size);
1009 if (m_V5Type.SetSize(size)) {
1010 FXSYS_memset32(m_V5Type.GetData(), 0, size);
1011 }
1012 } else {
1013 m_Trailers.Add((CPDF_Dictionary*)pStream->GetDict()->Clone());
1014 }
1015 CFX_DWordArray IndexArray, WidthArray;
1016 FX_DWORD nSegs = 0;
1017 CPDF_Array* pArray = pStream->GetDict()->GetArray(FX_BSTRC("Index"));
1018 if (pArray == NULL) {
1019 IndexArray.Add(0);
1020 IndexArray.Add(size);
1021 nSegs = 1;
1022 } else {
1023 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
1024 IndexArray.Add(pArray->GetInteger(i));
1025 }
1026 nSegs = pArray->GetCount() / 2;
1027 }
1028 pArray = pStream->GetDict()->GetArray(FX_BSTRC("W"));
1029 if (pArray == NULL) {
1030 pStream->Release();
1031 return FALSE;
1032 }
1033 FX_DWORD totalwidth = 0;
1034 FX_DWORD i;
1035 for (i = 0; i < pArray->GetCount(); i ++) {
1036 WidthArray.Add(pArray->GetInteger(i));
1037 if (totalwidth + WidthArray[i] < totalwidth) {
1038 pStream->Release();
1039 return FALSE;
1040 }
1041 totalwidth += WidthArray[i];
1042 }
1043 if (totalwidth == 0 || WidthArray.GetSize() < 3) {
1044 pStream->Release();
1045 return FALSE;
1046 }
1047 CPDF_StreamAcc acc;
1048 acc.LoadAllData(pStream);
1049 FX_LPCBYTE pData = acc.GetData();
1050 FX_DWORD dwTotalSize = acc.GetSize();
1051 FX_DWORD segindex = 0;
1052 for (i = 0; i < nSegs; i ++) {
1053 FX_INT32 startnum = IndexArray[i * 2];
1054 if (startnum < 0) {
1055 continue;
1056 }
1057 m_dwXrefStartObjNum = startnum;
1058 FX_DWORD count = IndexArray[i * 2 + 1];
1059 if (segindex + count < segindex || segindex + count == 0 ||
1060 (FX_DWORD)totalwidth >= UINT_MAX / (segindex + count) || (segindex + count) * (FX_DWORD)totalwidth > dwTotalSize) {
1061 continue;
1062 }
1063 FX_LPCBYTE segstart = pData + segindex * (FX_DWORD)totalwidth;
1064 if ((FX_DWORD)startnum + count < (FX_DWORD)startnum ||
1065 (FX_DWORD)startnum + count > (FX_DWORD)m_V5Type.GetSize()) {
1066 continue;
1067 }
1068 for (FX_DWORD j = 0; j < count; j ++) {
1069 FX_INT32 type = 1;
1070 FX_LPCBYTE entrystart = segstart + j * totalwidth;
1071 if (WidthArray[0]) {
1072 type = _GetVarInt(entrystart, WidthArray[0]);
1073 }
1074 if (m_V5Type[startnum + j] == 255) {
1075 FX_FILESIZE offset = _GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1076 m_CrossRef[startnum + j] = offset;
1077 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1078 if (pResult == NULL) {
1079 m_SortedOffset.Add(offset);
1080 }
1081 continue;
1082 }
1083 if (m_V5Type[startnum + j]) {
1084 continue;
1085 }
1086 m_V5Type[startnum + j] = type;
1087 if (type == 0) {
1088 m_CrossRef[startnum + j] = 0;
1089 } else {
1090 FX_FILESIZE offset = _GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1091 m_CrossRef[startnum + j] = offset;
1092 if (type == 1) {
1093 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1094 if (pResult == NULL) {
1095 m_SortedOffset.Add(offset);
1096 }
1097 } else {
1098 if (offset < 0 || offset >= m_V5Type.GetSize()) {
1099 pStream->Release();
1100 return FALSE;
1101 }
1102 m_V5Type[offset] = 255;
1103 }
1104 }
1105 }
1106 segindex += count;
1107 }
1108 pStream->Release();
1109 return TRUE;
1110 }
GetIDArray()1111 CPDF_Array* CPDF_Parser::GetIDArray()
1112 {
1113 CPDF_Object* pID = m_pTrailer->GetElement(FX_BSTRC("ID"));
1114 if (pID == NULL) {
1115 return NULL;
1116 }
1117 if (pID->GetType() == PDFOBJ_REFERENCE) {
1118 pID = ParseIndirectObject(NULL, ((CPDF_Reference*)pID)->GetRefObjNum());
1119 m_pTrailer->SetAt(FX_BSTRC("ID"), pID);
1120 }
1121 if (pID == NULL || pID->GetType() != PDFOBJ_ARRAY) {
1122 return NULL;
1123 }
1124 return (CPDF_Array*)pID;
1125 }
GetRootObjNum()1126 FX_DWORD CPDF_Parser::GetRootObjNum()
1127 {
1128 CPDF_Reference* pRef = (CPDF_Reference*)m_pTrailer->GetElement(FX_BSTRC("Root"));
1129 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
1130 return 0;
1131 }
1132 return pRef->GetRefObjNum();
1133 }
GetInfoObjNum()1134 FX_DWORD CPDF_Parser::GetInfoObjNum()
1135 {
1136 CPDF_Reference* pRef = (CPDF_Reference*)m_pTrailer->GetElement(FX_BSTRC("Info"));
1137 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
1138 return 0;
1139 }
1140 return pRef->GetRefObjNum();
1141 }
IsFormStream(FX_DWORD objnum,FX_BOOL & bForm)1142 FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm)
1143 {
1144 bForm = FALSE;
1145 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1146 return TRUE;
1147 }
1148 if (m_V5Type[objnum] == 0) {
1149 return TRUE;
1150 }
1151 if (m_V5Type[objnum] == 2) {
1152 return TRUE;
1153 }
1154 FX_FILESIZE pos = m_CrossRef[objnum];
1155 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1156 if (pResult == NULL) {
1157 return TRUE;
1158 }
1159 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1) {
1160 return FALSE;
1161 }
1162 FX_FILESIZE size = ((FX_FILESIZE*)pResult)[1] - pos;
1163 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1164 m_Syntax.RestorePos(pos);
1165 bForm = m_Syntax.SearchMultiWord(FX_BSTRC("/Form\0stream"), TRUE, size) == 0;
1166 m_Syntax.RestorePos(SavedPos);
1167 return TRUE;
1168 }
ParseIndirectObject(CPDF_IndirectObjects * pObjList,FX_DWORD objnum,PARSE_CONTEXT * pContext)1169 CPDF_Object* CPDF_Parser::ParseIndirectObject(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, PARSE_CONTEXT* pContext)
1170 {
1171 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1172 return NULL;
1173 }
1174 if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
1175 FX_FILESIZE pos = m_CrossRef[objnum];
1176 if (pos <= 0) {
1177 return NULL;
1178 }
1179 return ParseIndirectObjectAt(pObjList, pos, objnum, pContext);
1180 }
1181 if (m_V5Type[objnum] == 2) {
1182 CPDF_StreamAcc* pObjStream = GetObjectStream((FX_DWORD)m_CrossRef[objnum]);
1183 if (pObjStream == NULL) {
1184 return NULL;
1185 }
1186 FX_INT32 n = pObjStream->GetDict()->GetInteger(FX_BSTRC("N"));
1187 FX_INT32 offset = pObjStream->GetDict()->GetInteger(FX_BSTRC("First"));
1188 CPDF_SyntaxParser syntax;
1189 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream((FX_LPBYTE)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
1190 syntax.InitParser((IFX_FileStream*)file, 0);
1191 CPDF_Object* pRet = NULL;
1192 while (n) {
1193 FX_DWORD thisnum = syntax.GetDirectNum();
1194 FX_DWORD thisoff = syntax.GetDirectNum();
1195 if (thisnum == objnum) {
1196 syntax.RestorePos(offset + thisoff);
1197 pRet = syntax.GetObject(pObjList, 0, 0, 0, pContext);
1198 break;
1199 }
1200 n --;
1201 }
1202 return pRet;
1203 }
1204 return NULL;
1205 }
GetObjectStream(FX_DWORD objnum)1206 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum)
1207 {
1208 CPDF_StreamAcc* pStreamAcc = NULL;
1209 if (m_ObjectStreamMap.Lookup((void*)(FX_UINTPTR)objnum, (void*&)pStreamAcc)) {
1210 return pStreamAcc;
1211 }
1212 const CPDF_Stream* pStream = (CPDF_Stream*)m_pDocument->GetIndirectObject(objnum);
1213 if (pStream == NULL || pStream->GetType() != PDFOBJ_STREAM) {
1214 return NULL;
1215 }
1216 pStreamAcc = FX_NEW CPDF_StreamAcc;
1217 pStreamAcc->LoadAllData(pStream);
1218 m_ObjectStreamMap.SetAt((void*)(FX_UINTPTR)objnum, pStreamAcc);
1219 return pStreamAcc;
1220 }
GetObjectSize(FX_DWORD objnum)1221 FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum)
1222 {
1223 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1224 return 0;
1225 }
1226 if (m_V5Type[objnum] == 2) {
1227 objnum = (FX_DWORD)m_CrossRef[objnum];
1228 }
1229 if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
1230 FX_FILESIZE offset = m_CrossRef[objnum];
1231 if (offset == 0) {
1232 return 0;
1233 }
1234 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1235 if (pResult == NULL) {
1236 return 0;
1237 }
1238 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1) {
1239 return 0;
1240 }
1241 return ((FX_FILESIZE*)pResult)[1] - offset;
1242 }
1243 return 0;
1244 }
GetIndirectBinary(FX_DWORD objnum,FX_LPBYTE & pBuffer,FX_DWORD & size)1245 void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, FX_LPBYTE& pBuffer, FX_DWORD& size)
1246 {
1247 pBuffer = NULL;
1248 size = 0;
1249 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1250 return;
1251 }
1252 if (m_V5Type[objnum] == 2) {
1253 CPDF_StreamAcc* pObjStream = GetObjectStream((FX_DWORD)m_CrossRef[objnum]);
1254 if (pObjStream == NULL) {
1255 return;
1256 }
1257 FX_INT32 n = pObjStream->GetDict()->GetInteger(FX_BSTRC("N"));
1258 FX_INT32 offset = pObjStream->GetDict()->GetInteger(FX_BSTRC("First"));
1259 CPDF_SyntaxParser syntax;
1260 FX_LPCBYTE pData = pObjStream->GetData();
1261 FX_DWORD totalsize = pObjStream->GetSize();
1262 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream((FX_LPBYTE)pData, (size_t)totalsize, FALSE));
1263 syntax.InitParser((IFX_FileStream*)file, 0);
1264 while (n) {
1265 FX_DWORD thisnum = syntax.GetDirectNum();
1266 FX_DWORD thisoff = syntax.GetDirectNum();
1267 if (thisnum == objnum) {
1268 if (n == 1) {
1269 size = totalsize - (thisoff + offset);
1270 } else {
1271 FX_DWORD nextnum = syntax.GetDirectNum();
1272 FX_DWORD nextoff = syntax.GetDirectNum();
1273 size = nextoff - thisoff;
1274 }
1275 pBuffer = FX_Alloc(FX_BYTE, size);
1276 FXSYS_memcpy32(pBuffer, pData + thisoff + offset, size);
1277 return;
1278 }
1279 n --;
1280 }
1281 return;
1282 }
1283 if (m_V5Type[objnum] == 1) {
1284 FX_FILESIZE pos = m_CrossRef[objnum];
1285 if (pos == 0) {
1286 return;
1287 }
1288 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1289 m_Syntax.RestorePos(pos);
1290 FX_BOOL bIsNumber;
1291 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1292 if (!bIsNumber) {
1293 m_Syntax.RestorePos(SavedPos);
1294 return;
1295 }
1296 FX_DWORD real_objnum = FXSYS_atoi(word);
1297 if (real_objnum && real_objnum != objnum) {
1298 m_Syntax.RestorePos(SavedPos);
1299 return;
1300 }
1301 word = m_Syntax.GetNextWord(bIsNumber);
1302 if (!bIsNumber) {
1303 m_Syntax.RestorePos(SavedPos);
1304 return;
1305 }
1306 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1307 m_Syntax.RestorePos(SavedPos);
1308 return;
1309 }
1310 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1311 if (pResult == NULL) {
1312 m_Syntax.RestorePos(SavedPos);
1313 return;
1314 }
1315 FX_FILESIZE nextoff = ((FX_FILESIZE*)pResult)[1];
1316 FX_BOOL bNextOffValid = FALSE;
1317 if (nextoff != pos) {
1318 m_Syntax.RestorePos(nextoff);
1319 word = m_Syntax.GetNextWord(bIsNumber);
1320 if (word == FX_BSTRC("xref")) {
1321 bNextOffValid = TRUE;
1322 } else if (bIsNumber) {
1323 word = m_Syntax.GetNextWord(bIsNumber);
1324 if (bIsNumber && m_Syntax.GetKeyword() == FX_BSTRC("obj")) {
1325 bNextOffValid = TRUE;
1326 }
1327 }
1328 }
1329 if (!bNextOffValid) {
1330 m_Syntax.RestorePos(pos);
1331 while (1) {
1332 if (m_Syntax.GetKeyword() == FX_BSTRC("endobj")) {
1333 break;
1334 }
1335 if (m_Syntax.SavePos() == m_Syntax.m_FileLen) {
1336 break;
1337 }
1338 }
1339 nextoff = m_Syntax.SavePos();
1340 }
1341 size = (FX_DWORD)(nextoff - pos);
1342 pBuffer = FX_Alloc(FX_BYTE, size);
1343 m_Syntax.RestorePos(pos);
1344 m_Syntax.ReadBlock(pBuffer, size);
1345 m_Syntax.RestorePos(SavedPos);
1346 }
1347 }
ParseIndirectObjectAt(CPDF_IndirectObjects * pObjList,FX_FILESIZE pos,FX_DWORD objnum,PARSE_CONTEXT * pContext)1348 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(CPDF_IndirectObjects* pObjList, FX_FILESIZE pos, FX_DWORD objnum,
1349 PARSE_CONTEXT* pContext)
1350 {
1351 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1352 m_Syntax.RestorePos(pos);
1353 FX_BOOL bIsNumber;
1354 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1355 if (!bIsNumber) {
1356 m_Syntax.RestorePos(SavedPos);
1357 return NULL;
1358 }
1359 FX_FILESIZE objOffset = m_Syntax.SavePos();
1360 objOffset -= word.GetLength();
1361 FX_DWORD real_objnum = FXSYS_atoi(word);
1362 if (objnum && real_objnum != objnum) {
1363 m_Syntax.RestorePos(SavedPos);
1364 return NULL;
1365 }
1366 word = m_Syntax.GetNextWord(bIsNumber);
1367 if (!bIsNumber) {
1368 m_Syntax.RestorePos(SavedPos);
1369 return NULL;
1370 }
1371 FX_DWORD gennum = FXSYS_atoi(word);
1372 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1373 m_Syntax.RestorePos(SavedPos);
1374 return NULL;
1375 }
1376 CPDF_Object* pObj = m_Syntax.GetObject(pObjList, objnum, gennum, 0, pContext);
1377 FX_FILESIZE endOffset = m_Syntax.SavePos();
1378 CFX_ByteString bsWord = m_Syntax.GetKeyword();
1379 if (bsWord == FX_BSTRC("endobj")) {
1380 endOffset = m_Syntax.SavePos();
1381 }
1382 FX_DWORD objSize = endOffset - objOffset;
1383 m_Syntax.RestorePos(SavedPos);
1384 if (pObj && !objnum) {
1385 pObj->m_ObjNum = real_objnum;
1386 }
1387 return pObj;
1388 }
ParseIndirectObjectAtByStrict(CPDF_IndirectObjects * pObjList,FX_FILESIZE pos,FX_DWORD objnum,struct PARSE_CONTEXT * pContext,FX_FILESIZE * pResultPos)1389 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(CPDF_IndirectObjects* pObjList, FX_FILESIZE pos, FX_DWORD objnum,
1390 struct PARSE_CONTEXT* pContext, FX_FILESIZE *pResultPos)
1391 {
1392 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1393 m_Syntax.RestorePos(pos);
1394 FX_BOOL bIsNumber;
1395 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1396 if (!bIsNumber) {
1397 m_Syntax.RestorePos(SavedPos);
1398 return NULL;
1399 }
1400 FX_DWORD real_objnum = FXSYS_atoi(word);
1401 if (objnum && real_objnum != objnum) {
1402 m_Syntax.RestorePos(SavedPos);
1403 return NULL;
1404 }
1405 word = m_Syntax.GetNextWord(bIsNumber);
1406 if (!bIsNumber) {
1407 m_Syntax.RestorePos(SavedPos);
1408 return NULL;
1409 }
1410 FX_DWORD gennum = FXSYS_atoi(word);
1411 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1412 m_Syntax.RestorePos(SavedPos);
1413 return NULL;
1414 }
1415 CPDF_Object* pObj = m_Syntax.GetObjectByStrict(pObjList, objnum, gennum, 0, pContext);
1416 if (pResultPos) {
1417 *pResultPos = m_Syntax.m_Pos;
1418 }
1419 m_Syntax.RestorePos(SavedPos);
1420 return pObj;
1421 }
LoadTrailerV4()1422 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4()
1423 {
1424 if (m_Syntax.GetKeyword() != FX_BSTRC("trailer")) {
1425 return NULL;
1426 }
1427 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0);
1428 if (pObj == NULL || pObj->GetType() != PDFOBJ_DICTIONARY) {
1429 if (pObj) {
1430 pObj->Release();
1431 }
1432 return NULL;
1433 }
1434 return (CPDF_Dictionary*)pObj;
1435 }
GetPermissions(FX_BOOL bCheckRevision)1436 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision)
1437 {
1438 if (m_pSecurityHandler == NULL) {
1439 return (FX_DWORD) - 1;
1440 }
1441 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
1442 if (m_pEncryptDict && m_pEncryptDict->GetString(FX_BSTRC("Filter")) == FX_BSTRC("Standard")) {
1443 dwPermission &= 0xFFFFFFFC;
1444 dwPermission |= 0xFFFFF0C0;
1445 if(bCheckRevision && m_pEncryptDict->GetInteger(FX_BSTRC("R")) == 2) {
1446 dwPermission &= 0xFFFFF0FF;
1447 }
1448 }
1449 return dwPermission;
1450 }
IsOwner()1451 FX_BOOL CPDF_Parser::IsOwner()
1452 {
1453 return m_pSecurityHandler == NULL ? TRUE : m_pSecurityHandler->IsOwner();
1454 }
SetSecurityHandler(CPDF_SecurityHandler * pSecurityHandler,FX_BOOL bForced)1455 void CPDF_Parser::SetSecurityHandler(CPDF_SecurityHandler* pSecurityHandler, FX_BOOL bForced)
1456 {
1457 ASSERT(m_pSecurityHandler == NULL);
1458 if (m_pSecurityHandler && !m_bForceUseSecurityHandler) {
1459 delete m_pSecurityHandler;
1460 m_pSecurityHandler = NULL;
1461 }
1462 m_bForceUseSecurityHandler = bForced;
1463 m_pSecurityHandler = pSecurityHandler;
1464 if (m_bForceUseSecurityHandler) {
1465 return;
1466 }
1467 m_Syntax.m_pCryptoHandler = pSecurityHandler->CreateCryptoHandler();
1468 m_Syntax.m_pCryptoHandler->Init(NULL, pSecurityHandler);
1469 }
IsLinearizedFile(IFX_FileRead * pFileAccess,FX_DWORD offset)1470 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset)
1471 {
1472 m_Syntax.InitParser(pFileAccess, offset);
1473 m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9);
1474 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1475 FX_BOOL bIsNumber;
1476 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1477 if (!bIsNumber) {
1478 return FALSE;
1479 }
1480 FX_DWORD objnum = FXSYS_atoi(word);
1481 word = m_Syntax.GetNextWord(bIsNumber);
1482 if (!bIsNumber) {
1483 return FALSE;
1484 }
1485 FX_DWORD gennum = FXSYS_atoi(word);
1486 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1487 m_Syntax.RestorePos(SavedPos);
1488 return FALSE;
1489 }
1490 m_pLinearized = m_Syntax.GetObject(NULL, objnum, gennum, 0);
1491 if (!m_pLinearized) {
1492 return FALSE;
1493 }
1494 if (m_pLinearized->GetDict()->GetElement(FX_BSTRC("Linearized"))) {
1495 m_Syntax.GetNextWord(bIsNumber);
1496 CPDF_Object *pLen = m_pLinearized->GetDict()->GetElement(FX_BSTRC("L"));
1497 if (!pLen) {
1498 m_pLinearized->Release();
1499 return FALSE;
1500 }
1501 if (pLen->GetInteger() != (int)pFileAccess->GetSize()) {
1502 return FALSE;
1503 }
1504 CPDF_Object *pNo = m_pLinearized->GetDict()->GetElement(FX_BSTRC("P"));
1505 if (pNo && pNo->GetType() == PDFOBJ_NUMBER) {
1506 m_dwFirstPageNo = pNo->GetInteger();
1507 }
1508 CPDF_Object *pTable = m_pLinearized->GetDict()->GetElement(FX_BSTRC("T"));
1509 if (pTable && pTable->GetType() == PDFOBJ_NUMBER) {
1510 m_LastXRefOffset = pTable->GetInteger();
1511 }
1512 return TRUE;
1513 }
1514 m_pLinearized->Release();
1515 m_pLinearized = NULL;
1516 return FALSE;
1517 }
StartAsynParse(IFX_FileRead * pFileAccess,FX_BOOL bReParse,FX_BOOL bOwnFileRead)1518 FX_DWORD CPDF_Parser::StartAsynParse(IFX_FileRead* pFileAccess, FX_BOOL bReParse, FX_BOOL bOwnFileRead)
1519 {
1520 CloseParser(bReParse);
1521 m_bXRefStream = FALSE;
1522 m_LastXRefOffset = 0;
1523 m_bOwnFileRead = bOwnFileRead;
1524 FX_INT32 offset = GetHeaderOffset(pFileAccess);
1525 if (offset == -1) {
1526 return PDFPARSE_ERROR_FORMAT;
1527 }
1528 if (!IsLinearizedFile(pFileAccess, offset)) {
1529 m_Syntax.m_pFileAccess = NULL;
1530 return StartParse(pFileAccess, bReParse, bOwnFileRead);
1531 }
1532 if (!bReParse) {
1533 m_pDocument = FX_NEW CPDF_Document(this);
1534 }
1535 FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos();
1536 FX_BOOL bXRefRebuilt = FALSE;
1537 FX_BOOL bLoadV4 = FALSE;
1538 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE, FALSE)) && !LoadCrossRefV5(dwFirstXRefOffset, dwFirstXRefOffset, TRUE)) {
1539 if (!RebuildCrossRef()) {
1540 return PDFPARSE_ERROR_FORMAT;
1541 }
1542 bXRefRebuilt = TRUE;
1543 m_LastXRefOffset = 0;
1544 }
1545 if (bLoadV4) {
1546 m_pTrailer = LoadTrailerV4();
1547 if (m_pTrailer == NULL) {
1548 return FALSE;
1549 }
1550 FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
1551 if (xrefsize == 0) {
1552 return FALSE;
1553 }
1554 m_CrossRef.SetSize(xrefsize);
1555 m_V5Type.SetSize(xrefsize);
1556 }
1557 FX_DWORD dwRet = SetEncryptHandler();
1558 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1559 return dwRet;
1560 }
1561 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1562 if (m_pDocument->GetRoot() == NULL || m_pDocument->GetPageCount() == 0) {
1563 if (bXRefRebuilt) {
1564 return PDFPARSE_ERROR_FORMAT;
1565 }
1566 ReleaseEncryptHandler();
1567 if (!RebuildCrossRef()) {
1568 return PDFPARSE_ERROR_FORMAT;
1569 }
1570 dwRet = SetEncryptHandler();
1571 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1572 return dwRet;
1573 }
1574 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1575 if (m_pDocument->GetRoot() == NULL) {
1576 return PDFPARSE_ERROR_FORMAT;
1577 }
1578 }
1579 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1580 FX_DWORD RootObjNum = GetRootObjNum();
1581 if (RootObjNum == 0) {
1582 ReleaseEncryptHandler();
1583 RebuildCrossRef();
1584 RootObjNum = GetRootObjNum();
1585 if (RootObjNum == 0) {
1586 return PDFPARSE_ERROR_FORMAT;
1587 }
1588 dwRet = SetEncryptHandler();
1589 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1590 return dwRet;
1591 }
1592 }
1593 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1594 CPDF_Reference* pMetadata = (CPDF_Reference*)m_pDocument->GetRoot()->GetElement(FX_BSTRC("Metadata"));
1595 if (pMetadata && pMetadata->GetType() == PDFOBJ_REFERENCE) {
1596 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
1597 }
1598 }
1599 return PDFPARSE_ERROR_SUCCESS;
1600 }
LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)1601 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)
1602 {
1603 if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
1604 return FALSE;
1605 }
1606 while (xrefpos)
1607 if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
1608 return FALSE;
1609 }
1610 m_ObjectStreamMap.InitHashTable(101, FALSE);
1611 m_bXRefStream = TRUE;
1612 return TRUE;
1613 }
LoadLinearizedMainXRefTable()1614 FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable()
1615 {
1616 FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum;
1617 m_Syntax.m_MetadataObjnum = 0;
1618 if (m_pTrailer) {
1619 m_pTrailer->Release();
1620 m_pTrailer = NULL;
1621 }
1622 m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset);
1623 FX_FILESIZE dwSavedPos = m_Syntax.SavePos();
1624 FX_BYTE ch = 0;
1625 FX_DWORD dwCount = 0;
1626 m_Syntax.GetNextChar(ch);
1627 FX_INT32 type = _PDF_CharType[ch];
1628 while (type == 'W') {
1629 ++dwCount;
1630 if (m_Syntax.m_FileLen >= (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) {
1631 break;
1632 }
1633 m_Syntax.GetNextChar(ch);
1634 type = _PDF_CharType[ch];
1635 }
1636 m_LastXRefOffset += dwCount;
1637 FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
1638 while (pos) {
1639 FX_LPVOID objnum;
1640 CPDF_StreamAcc* pStream;
1641 m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
1642 delete pStream;
1643 }
1644 m_ObjectStreamMap.RemoveAll();
1645 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1646 m_LastXRefOffset = 0;
1647 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
1648 return PDFPARSE_ERROR_FORMAT;
1649 }
1650 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_DWORD), _CompareDWord);
1651 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
1652 return PDFPARSE_ERROR_SUCCESS;
1653 }
CPDF_SyntaxParser()1654 CPDF_SyntaxParser::CPDF_SyntaxParser()
1655 {
1656 m_pFileAccess = NULL;
1657 m_pCryptoHandler = NULL;
1658 m_pFileBuf = NULL;
1659 m_BufSize = CPDF_ModuleMgr::Get()->m_FileBufSize;
1660 m_pFileBuf = NULL;
1661 m_MetadataObjnum = 0;
1662 m_dwWordPos = 0;
1663 #if defined(_FPDFAPI_MINI_)
1664 m_bFileStream = TRUE;
1665 #else
1666 m_bFileStream = FALSE;
1667 #endif
1668 }
~CPDF_SyntaxParser()1669 CPDF_SyntaxParser::~CPDF_SyntaxParser()
1670 {
1671 if (m_pFileBuf) {
1672 FX_Free(m_pFileBuf);
1673 }
1674 }
GetCharAt(FX_FILESIZE pos,FX_BYTE & ch)1675 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, FX_BYTE& ch)
1676 {
1677 FX_FILESIZE save_pos = m_Pos;
1678 m_Pos = pos;
1679 FX_BOOL ret = GetNextChar(ch);
1680 m_Pos = save_pos;
1681 return ret;
1682 }
GetNextChar(FX_BYTE & ch)1683 FX_BOOL CPDF_SyntaxParser::GetNextChar(FX_BYTE& ch)
1684 {
1685 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
1686 if (pos >= m_FileLen) {
1687 return FALSE;
1688 }
1689 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1690 FX_FILESIZE read_pos = pos;
1691 FX_DWORD read_size = m_BufSize;
1692 if ((FX_FILESIZE)read_size > m_FileLen) {
1693 read_size = (FX_DWORD)m_FileLen;
1694 }
1695 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1696 if (m_FileLen < (FX_FILESIZE)read_size) {
1697 read_pos = 0;
1698 read_size = (FX_DWORD)m_FileLen;
1699 } else {
1700 read_pos = m_FileLen - read_size;
1701 }
1702 }
1703 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
1704 return FALSE;
1705 }
1706 m_BufOffset = read_pos;
1707 }
1708 ch = m_pFileBuf[pos - m_BufOffset];
1709 m_Pos ++;
1710 return TRUE;
1711 }
GetCharAtBackward(FX_FILESIZE pos,FX_BYTE & ch)1712 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, FX_BYTE& ch)
1713 {
1714 pos += m_HeaderOffset;
1715 if (pos >= m_FileLen) {
1716 return FALSE;
1717 }
1718 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1719 FX_FILESIZE read_pos;
1720 if (pos < (FX_FILESIZE)m_BufSize) {
1721 read_pos = 0;
1722 } else {
1723 read_pos = pos - m_BufSize + 1;
1724 }
1725 FX_DWORD read_size = m_BufSize;
1726 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1727 if (m_FileLen < (FX_FILESIZE)read_size) {
1728 read_pos = 0;
1729 read_size = (FX_DWORD)m_FileLen;
1730 } else {
1731 read_pos = m_FileLen - read_size;
1732 }
1733 }
1734 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
1735 return FALSE;
1736 }
1737 m_BufOffset = read_pos;
1738 }
1739 ch = m_pFileBuf[pos - m_BufOffset];
1740 return TRUE;
1741 }
ReadBlock(FX_LPBYTE pBuf,FX_DWORD size)1742 FX_BOOL CPDF_SyntaxParser::ReadBlock(FX_LPBYTE pBuf, FX_DWORD size)
1743 {
1744 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) {
1745 return FALSE;
1746 }
1747 m_Pos += size;
1748 return TRUE;
1749 }
1750 #define MAX_WORD_BUFFER 256
GetNextWord()1751 void CPDF_SyntaxParser::GetNextWord()
1752 {
1753 m_WordSize = 0;
1754 m_bIsNumber = TRUE;
1755 FX_BYTE ch;
1756 if (!GetNextChar(ch)) {
1757 return;
1758 }
1759 FX_BYTE type = _PDF_CharType[ch];
1760 while (1) {
1761 while (type == 'W') {
1762 if (!GetNextChar(ch)) {
1763 return;
1764 }
1765 type = _PDF_CharType[ch];
1766 }
1767 if (ch != '%') {
1768 break;
1769 }
1770 while (1) {
1771 if (!GetNextChar(ch)) {
1772 return;
1773 }
1774 if (ch == '\r' || ch == '\n') {
1775 break;
1776 }
1777 }
1778 type = _PDF_CharType[ch];
1779 }
1780 if (type == 'D') {
1781 m_bIsNumber = FALSE;
1782 m_WordBuffer[m_WordSize++] = ch;
1783 if (ch == '/') {
1784 while (1) {
1785 if (!GetNextChar(ch)) {
1786 return;
1787 }
1788 type = _PDF_CharType[ch];
1789 if (type != 'R' && type != 'N') {
1790 m_Pos --;
1791 return;
1792 }
1793 if (m_WordSize < MAX_WORD_BUFFER) {
1794 m_WordBuffer[m_WordSize++] = ch;
1795 }
1796 }
1797 } else if (ch == '<') {
1798 if (!GetNextChar(ch)) {
1799 return;
1800 }
1801 if (ch == '<') {
1802 m_WordBuffer[m_WordSize++] = ch;
1803 } else {
1804 m_Pos --;
1805 }
1806 } else if (ch == '>') {
1807 if (!GetNextChar(ch)) {
1808 return;
1809 }
1810 if (ch == '>') {
1811 m_WordBuffer[m_WordSize++] = ch;
1812 } else {
1813 m_Pos --;
1814 }
1815 }
1816 return;
1817 }
1818 while (1) {
1819 if (m_WordSize < MAX_WORD_BUFFER) {
1820 m_WordBuffer[m_WordSize++] = ch;
1821 }
1822 if (type != 'N') {
1823 m_bIsNumber = FALSE;
1824 }
1825 if (!GetNextChar(ch)) {
1826 return;
1827 }
1828 type = _PDF_CharType[ch];
1829 if (type == 'D' || type == 'W') {
1830 m_Pos --;
1831 break;
1832 }
1833 }
1834 }
ReadString()1835 CFX_ByteString CPDF_SyntaxParser::ReadString()
1836 {
1837 FX_BYTE ch;
1838 if (!GetNextChar(ch)) {
1839 return CFX_ByteString();
1840 }
1841 CFX_ByteTextBuf buf;
1842 FX_INT32 parlevel = 0;
1843 FX_INT32 status = 0, iEscCode = 0;
1844 while (1) {
1845 switch (status) {
1846 case 0:
1847 if (ch == ')') {
1848 if (parlevel == 0) {
1849 return buf.GetByteString();
1850 }
1851 parlevel --;
1852 buf.AppendChar(')');
1853 } else if (ch == '(') {
1854 parlevel ++;
1855 buf.AppendChar('(');
1856 } else if (ch == '\\') {
1857 status = 1;
1858 } else {
1859 buf.AppendChar(ch);
1860 }
1861 break;
1862 case 1:
1863 if (ch >= '0' && ch <= '7') {
1864 iEscCode = ch - '0';
1865 status = 2;
1866 break;
1867 }
1868 if (ch == 'n') {
1869 buf.AppendChar('\n');
1870 } else if (ch == 'r') {
1871 buf.AppendChar('\r');
1872 } else if (ch == 't') {
1873 buf.AppendChar('\t');
1874 } else if (ch == 'b') {
1875 buf.AppendChar('\b');
1876 } else if (ch == 'f') {
1877 buf.AppendChar('\f');
1878 } else if (ch == '\r') {
1879 status = 4;
1880 break;
1881 } else if (ch == '\n') {
1882 } else {
1883 buf.AppendChar(ch);
1884 }
1885 status = 0;
1886 break;
1887 case 2:
1888 if (ch >= '0' && ch <= '7') {
1889 iEscCode = iEscCode * 8 + ch - '0';
1890 status = 3;
1891 } else {
1892 buf.AppendChar(iEscCode);
1893 status = 0;
1894 continue;
1895 }
1896 break;
1897 case 3:
1898 if (ch >= '0' && ch <= '7') {
1899 iEscCode = iEscCode * 8 + ch - '0';
1900 buf.AppendChar(iEscCode);
1901 status = 0;
1902 } else {
1903 buf.AppendChar(iEscCode);
1904 status = 0;
1905 continue;
1906 }
1907 break;
1908 case 4:
1909 status = 0;
1910 if (ch != '\n') {
1911 continue;
1912 }
1913 break;
1914 }
1915 if (!GetNextChar(ch)) {
1916 break;
1917 }
1918 }
1919 GetNextChar(ch);
1920 return buf.GetByteString();
1921 }
ReadHexString()1922 CFX_ByteString CPDF_SyntaxParser::ReadHexString()
1923 {
1924 FX_BYTE ch;
1925 if (!GetNextChar(ch)) {
1926 return CFX_ByteString();
1927 }
1928 CFX_BinaryBuf buf;
1929 FX_BOOL bFirst = TRUE;
1930 FX_BYTE code = 0;
1931 while (1) {
1932 if (ch == '>') {
1933 break;
1934 }
1935 if (ch >= '0' && ch <= '9') {
1936 if (bFirst) {
1937 code = (ch - '0') * 16;
1938 } else {
1939 code += ch - '0';
1940 buf.AppendByte((FX_BYTE)code);
1941 }
1942 bFirst = !bFirst;
1943 } else if (ch >= 'A' && ch <= 'F') {
1944 if (bFirst) {
1945 code = (ch - 'A' + 10) * 16;
1946 } else {
1947 code += ch - 'A' + 10;
1948 buf.AppendByte((FX_BYTE)code);
1949 }
1950 bFirst = !bFirst;
1951 } else if (ch >= 'a' && ch <= 'f') {
1952 if (bFirst) {
1953 code = (ch - 'a' + 10) * 16;
1954 } else {
1955 code += ch - 'a' + 10;
1956 buf.AppendByte((FX_BYTE)code);
1957 }
1958 bFirst = !bFirst;
1959 }
1960 if (!GetNextChar(ch)) {
1961 break;
1962 }
1963 }
1964 if (!bFirst) {
1965 buf.AppendByte((FX_BYTE)code);
1966 }
1967 return buf.GetByteString();
1968 }
ToNextLine()1969 void CPDF_SyntaxParser::ToNextLine()
1970 {
1971 FX_BYTE ch;
1972 while (1) {
1973 if (!GetNextChar(ch)) {
1974 return;
1975 }
1976 if (ch == '\n') {
1977 return;
1978 }
1979 if (ch == '\r') {
1980 GetNextChar(ch);
1981 if (ch == '\n') {
1982 return;
1983 } else {
1984 m_Pos --;
1985 return;
1986 }
1987 }
1988 }
1989 }
ToNextWord()1990 void CPDF_SyntaxParser::ToNextWord()
1991 {
1992 FX_BYTE ch;
1993 if (!GetNextChar(ch)) {
1994 return;
1995 }
1996 FX_BYTE type = _PDF_CharType[ch];
1997 while (1) {
1998 while (type == 'W') {
1999 m_dwWordPos = m_Pos;
2000 if (!GetNextChar(ch)) {
2001 return;
2002 }
2003 type = _PDF_CharType[ch];
2004 }
2005 if (ch != '%') {
2006 break;
2007 }
2008 while (1) {
2009 if (!GetNextChar(ch)) {
2010 return;
2011 }
2012 if (ch == '\r' || ch == '\n') {
2013 break;
2014 }
2015 }
2016 type = _PDF_CharType[ch];
2017 }
2018 m_Pos --;
2019 }
GetNextWord(FX_BOOL & bIsNumber)2020 CFX_ByteString CPDF_SyntaxParser::GetNextWord(FX_BOOL& bIsNumber)
2021 {
2022 GetNextWord();
2023 bIsNumber = m_bIsNumber;
2024 return CFX_ByteString((FX_LPCSTR)m_WordBuffer, m_WordSize);
2025 }
GetKeyword()2026 CFX_ByteString CPDF_SyntaxParser::GetKeyword()
2027 {
2028 GetNextWord();
2029 return CFX_ByteString((FX_LPCSTR)m_WordBuffer, m_WordSize);
2030 }
GetObject(CPDF_IndirectObjects * pObjList,FX_DWORD objnum,FX_DWORD gennum,FX_INT32 level,PARSE_CONTEXT * pContext,FX_BOOL bDecrypt)2031 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, FX_DWORD gennum, FX_INT32 level, PARSE_CONTEXT* pContext, FX_BOOL bDecrypt)
2032 {
2033 if (level > _PARSER_OBJECT_LEVLE_) {
2034 return NULL;
2035 }
2036 FX_FILESIZE SavedPos = m_Pos;
2037 FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
2038 FX_BOOL bIsNumber;
2039 CFX_ByteString word = GetNextWord(bIsNumber);
2040 CPDF_Object* pRet = NULL;
2041 if (word.GetLength() == 0) {
2042 if (bTypeOnly) {
2043 return (CPDF_Object*)PDFOBJ_INVALID;
2044 }
2045 return NULL;
2046 }
2047 FX_FILESIZE wordOffset = m_Pos - word.GetLength();
2048 if (bIsNumber) {
2049 FX_FILESIZE SavedPos = m_Pos;
2050 CFX_ByteString nextword = GetNextWord(bIsNumber);
2051 if (bIsNumber) {
2052 CFX_ByteString nextword2 = GetNextWord(bIsNumber);
2053 if (nextword2 == FX_BSTRC("R")) {
2054 FX_DWORD objnum = FXSYS_atoi(word);
2055 if (bTypeOnly) {
2056 return (CPDF_Object*)PDFOBJ_REFERENCE;
2057 }
2058 pRet = CPDF_Reference::Create(pObjList, objnum);
2059 return pRet;
2060 } else {
2061 m_Pos = SavedPos;
2062 if (bTypeOnly) {
2063 return (CPDF_Object*)PDFOBJ_NUMBER;
2064 }
2065 pRet = CPDF_Number::Create(word);
2066 return pRet;
2067 }
2068 } else {
2069 m_Pos = SavedPos;
2070 if (bTypeOnly) {
2071 return (CPDF_Object*)PDFOBJ_NUMBER;
2072 }
2073 pRet = CPDF_Number::Create(word);
2074 return pRet;
2075 }
2076 }
2077 if (word == FX_BSTRC("true") || word == FX_BSTRC("false")) {
2078 if (bTypeOnly) {
2079 return (CPDF_Object*)PDFOBJ_BOOLEAN;
2080 }
2081 pRet = CPDF_Boolean::Create(word == FX_BSTRC("true"));
2082 return pRet;
2083 }
2084 if (word == FX_BSTRC("null")) {
2085 if (bTypeOnly) {
2086 return (CPDF_Object*)PDFOBJ_NULL;
2087 }
2088 pRet = CPDF_Null::Create();
2089 return pRet;
2090 }
2091 if (word == FX_BSTRC("(")) {
2092 if (bTypeOnly) {
2093 return (CPDF_Object*)PDFOBJ_STRING;
2094 }
2095 FX_FILESIZE SavedPos = m_Pos - 1;
2096 CFX_ByteString str = ReadString();
2097 if (m_pCryptoHandler && bDecrypt) {
2098 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2099 }
2100 pRet = CPDF_String::Create(str, FALSE);
2101 return pRet;
2102 }
2103 if (word == FX_BSTRC("<")) {
2104 if (bTypeOnly) {
2105 return (CPDF_Object*)PDFOBJ_STRING;
2106 }
2107 FX_FILESIZE SavedPos = m_Pos - 1;
2108 CFX_ByteString str = ReadHexString();
2109 if (m_pCryptoHandler && bDecrypt) {
2110 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2111 }
2112 pRet = CPDF_String::Create(str, TRUE);
2113 return pRet;
2114 }
2115 if (word == FX_BSTRC("[")) {
2116 if (bTypeOnly) {
2117 return (CPDF_Object*)PDFOBJ_ARRAY;
2118 }
2119 CPDF_Array* pArray = CPDF_Array::Create();
2120 FX_FILESIZE firstPos = m_Pos - 1;
2121 while (1) {
2122 FX_FILESIZE SavedPos = m_Pos;
2123 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
2124 if (pObj == NULL) {
2125 return pArray;
2126 }
2127 pArray->Add(pObj);
2128 }
2129 }
2130 if (word[0] == '/') {
2131 if (bTypeOnly) {
2132 return (CPDF_Object*)PDFOBJ_NAME;
2133 }
2134 pRet = CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2135 return pRet;
2136 }
2137 if (word == FX_BSTRC("<<")) {
2138 FX_FILESIZE saveDictOffset = m_Pos - 2;
2139 FX_DWORD dwDictSize = 0;
2140 if (bTypeOnly) {
2141 return (CPDF_Object*)PDFOBJ_DICTIONARY;
2142 }
2143 if (pContext) {
2144 pContext->m_DictStart = SavedPos;
2145 }
2146 CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
2147 FX_INT32 nKeys = 0;
2148 FX_FILESIZE dwSignValuePos = 0;
2149 while (1) {
2150 FX_BOOL bIsNumber;
2151 CFX_ByteString key = GetNextWord(bIsNumber);
2152 if (key.IsEmpty()) {
2153 pDict->Release();
2154 return NULL;
2155 }
2156 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
2157 if (key == FX_BSTRC(">>")) {
2158 dwDictSize = m_Pos - saveDictOffset;
2159 break;
2160 }
2161 if (key == FX_BSTRC("endobj")) {
2162 dwDictSize = m_Pos - 6 - saveDictOffset;
2163 m_Pos = SavedPos;
2164 break;
2165 }
2166 if (key[0] != '/') {
2167 continue;
2168 }
2169 nKeys ++;
2170 key = PDF_NameDecode(key);
2171 if (key == FX_BSTRC("/Contents")) {
2172 dwSignValuePos = m_Pos;
2173 }
2174 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
2175 if (pObj == NULL) {
2176 continue;
2177 }
2178 if (key.GetLength() == 1) {
2179 pDict->SetAt(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
2180 } else {
2181 if (nKeys < 32) {
2182 pDict->SetAt(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
2183 } else {
2184 pDict->AddValue(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
2185 }
2186 }
2187 }
2188 if (IsSignatureDict(pDict)) {
2189 FX_FILESIZE dwSavePos = m_Pos;
2190 m_Pos = dwSignValuePos;
2191 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1, NULL, FALSE);
2192 pDict->SetAt(FX_BSTRC("Contents"), pObj);
2193 m_Pos = dwSavePos;
2194 }
2195 if (pContext) {
2196 pContext->m_DictEnd = m_Pos;
2197 if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
2198 return pDict;
2199 }
2200 }
2201 FX_FILESIZE SavedPos = m_Pos;
2202 FX_BOOL bIsNumber;
2203 CFX_ByteString nextword = GetNextWord(bIsNumber);
2204 if (nextword == FX_BSTRC("stream")) {
2205 CPDF_Stream* pStream = ReadStream(pDict, pContext, objnum, gennum);
2206 if (pStream) {
2207 return pStream;
2208 }
2209 pDict->Release();
2210 return NULL;
2211 } else {
2212 m_Pos = SavedPos;
2213 return pDict;
2214 }
2215 }
2216 if (word == FX_BSTRC(">>")) {
2217 m_Pos = SavedPos;
2218 return NULL;
2219 }
2220 if (bTypeOnly) {
2221 return (CPDF_Object*)PDFOBJ_INVALID;
2222 }
2223 return NULL;
2224 }
GetObjectByStrict(CPDF_IndirectObjects * pObjList,FX_DWORD objnum,FX_DWORD gennum,FX_INT32 level,struct PARSE_CONTEXT * pContext)2225 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, FX_DWORD gennum,
2226 FX_INT32 level, struct PARSE_CONTEXT* pContext)
2227 {
2228 if (level > _PARSER_OBJECT_LEVLE_) {
2229 return NULL;
2230 }
2231 FX_FILESIZE SavedPos = m_Pos;
2232 FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
2233 FX_BOOL bIsNumber;
2234 CFX_ByteString word = GetNextWord(bIsNumber);
2235 if (word.GetLength() == 0) {
2236 if (bTypeOnly) {
2237 return (CPDF_Object*)PDFOBJ_INVALID;
2238 }
2239 return NULL;
2240 }
2241 if (bIsNumber) {
2242 FX_FILESIZE SavedPos = m_Pos;
2243 CFX_ByteString nextword = GetNextWord(bIsNumber);
2244 if (bIsNumber) {
2245 CFX_ByteString nextword2 = GetNextWord(bIsNumber);
2246 if (nextword2 == FX_BSTRC("R")) {
2247 FX_DWORD objnum = FXSYS_atoi(word);
2248 if (bTypeOnly) {
2249 return (CPDF_Object*)PDFOBJ_REFERENCE;
2250 }
2251 return CPDF_Reference::Create(pObjList, objnum);
2252 } else {
2253 m_Pos = SavedPos;
2254 if (bTypeOnly) {
2255 return (CPDF_Object*)PDFOBJ_NUMBER;
2256 }
2257 return CPDF_Number::Create(word);
2258 }
2259 } else {
2260 m_Pos = SavedPos;
2261 if (bTypeOnly) {
2262 return (CPDF_Object*)PDFOBJ_NUMBER;
2263 }
2264 return CPDF_Number::Create(word);
2265 }
2266 }
2267 if (word == FX_BSTRC("true") || word == FX_BSTRC("false")) {
2268 if (bTypeOnly) {
2269 return (CPDF_Object*)PDFOBJ_BOOLEAN;
2270 }
2271 return CPDF_Boolean::Create(word == FX_BSTRC("true"));
2272 }
2273 if (word == FX_BSTRC("null")) {
2274 if (bTypeOnly) {
2275 return (CPDF_Object*)PDFOBJ_NULL;
2276 }
2277 return CPDF_Null::Create();
2278 }
2279 if (word == FX_BSTRC("(")) {
2280 if (bTypeOnly) {
2281 return (CPDF_Object*)PDFOBJ_STRING;
2282 }
2283 CFX_ByteString str = ReadString();
2284 if (m_pCryptoHandler) {
2285 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2286 }
2287 return CPDF_String::Create(str, FALSE);
2288 }
2289 if (word == FX_BSTRC("<")) {
2290 if (bTypeOnly) {
2291 return (CPDF_Object*)PDFOBJ_STRING;
2292 }
2293 CFX_ByteString str = ReadHexString();
2294 if (m_pCryptoHandler) {
2295 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2296 }
2297 return CPDF_String::Create(str, TRUE);
2298 }
2299 if (word == FX_BSTRC("[")) {
2300 if (bTypeOnly) {
2301 return (CPDF_Object*)PDFOBJ_ARRAY;
2302 }
2303 CPDF_Array* pArray = CPDF_Array::Create();
2304 while (1) {
2305 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
2306 if (pObj == NULL) {
2307 if (m_WordBuffer[0] == ']') {
2308 return pArray;
2309 }
2310 pArray->Release();
2311 return NULL;
2312 }
2313 pArray->Add(pObj);
2314 }
2315 }
2316 if (word[0] == '/') {
2317 if (bTypeOnly) {
2318 return (CPDF_Object*)PDFOBJ_NAME;
2319 }
2320 return CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2321 }
2322 if (word == FX_BSTRC("<<")) {
2323 if (bTypeOnly) {
2324 return (CPDF_Object*)PDFOBJ_DICTIONARY;
2325 }
2326 if (pContext) {
2327 pContext->m_DictStart = SavedPos;
2328 }
2329 CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
2330 while (1) {
2331 FX_BOOL bIsNumber;
2332 FX_FILESIZE SavedPos = m_Pos;
2333 CFX_ByteString key = GetNextWord(bIsNumber);
2334 if (key.IsEmpty()) {
2335 pDict->Release();
2336 return NULL;
2337 }
2338 if (key == FX_BSTRC(">>")) {
2339 break;
2340 }
2341 if (key == FX_BSTRC("endobj")) {
2342 m_Pos = SavedPos;
2343 break;
2344 }
2345 if (key[0] != '/') {
2346 continue;
2347 }
2348 key = PDF_NameDecode(key);
2349 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, level + 1);
2350 if (pObj == NULL) {
2351 pDict->Release();
2352 FX_BYTE ch;
2353 while (1) {
2354 if (!GetNextChar(ch)) {
2355 break;
2356 }
2357 if (ch == 0x0A || ch == 0x0D) {
2358 break;
2359 }
2360 }
2361 return NULL;
2362 }
2363 if (key.GetLength() == 1) {
2364 pDict->SetAt(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
2365 } else {
2366 pDict->AddValue(CFX_ByteStringC(((FX_LPCSTR)key) + 1, key.GetLength() - 1), pObj);
2367 }
2368 }
2369 if (pContext) {
2370 pContext->m_DictEnd = m_Pos;
2371 if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
2372 return pDict;
2373 }
2374 }
2375 FX_FILESIZE SavedPos = m_Pos;
2376 FX_BOOL bIsNumber;
2377 CFX_ByteString nextword = GetNextWord(bIsNumber);
2378 if (nextword == FX_BSTRC("stream")) {
2379 CPDF_Stream* pStream = ReadStream(pDict, pContext, objnum, gennum);
2380 if (pStream) {
2381 return pStream;
2382 }
2383 pDict->Release();
2384 return NULL;
2385 } else {
2386 m_Pos = SavedPos;
2387 return pDict;
2388 }
2389 }
2390 if (word == FX_BSTRC(">>")) {
2391 m_Pos = SavedPos;
2392 return NULL;
2393 }
2394 if (bTypeOnly) {
2395 return (CPDF_Object*)PDFOBJ_INVALID;
2396 }
2397 return NULL;
2398 }
ReadStream(CPDF_Dictionary * pDict,PARSE_CONTEXT * pContext,FX_DWORD objnum,FX_DWORD gennum)2399 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, PARSE_CONTEXT* pContext,
2400 FX_DWORD objnum, FX_DWORD gennum)
2401 {
2402 CPDF_Object* pLenObj = pDict->GetElement(FX_BSTRC("Length"));
2403 FX_DWORD len = 0;
2404 if (pLenObj && (pLenObj->GetType() != PDFOBJ_REFERENCE ||
2405 ((((CPDF_Reference*)pLenObj)->GetObjList() != NULL) &&
2406 ((CPDF_Reference*)pLenObj)->GetRefObjNum() != objnum))) {
2407 FX_FILESIZE pos = m_Pos;
2408 if (pLenObj) {
2409 len = pLenObj->GetInteger();
2410 }
2411 m_Pos = pos;
2412 if (len > 0x40000000) {
2413 return NULL;
2414 }
2415 }
2416 ToNextLine();
2417 FX_FILESIZE StreamStartPos = m_Pos;
2418 if (pContext) {
2419 pContext->m_DataStart = m_Pos;
2420 }
2421 m_Pos += len;
2422 CPDF_CryptoHandler* pCryptoHandler = objnum == (FX_DWORD)m_MetadataObjnum ? NULL : m_pCryptoHandler;
2423 if (pCryptoHandler == NULL) {
2424 FX_FILESIZE SavedPos = m_Pos;
2425 GetNextWord();
2426 if (m_WordSize < 9 || FXSYS_memcmp32(m_WordBuffer, "endstream", 9)) {
2427 m_Pos = StreamStartPos;
2428 FX_FILESIZE offset = FindTag(FX_BSTRC("endstream"), 0);
2429 if (offset >= 0) {
2430 FX_FILESIZE curPos = m_Pos;
2431 m_Pos = StreamStartPos;
2432 FX_FILESIZE endobjOffset = FindTag(FX_BSTRC("endobj"), 0);
2433 if (endobjOffset < offset && endobjOffset >= 0) {
2434 offset = endobjOffset;
2435 } else {
2436 m_Pos = curPos;
2437 }
2438 FX_BYTE byte1, byte2;
2439 GetCharAt(StreamStartPos + offset - 1, byte1);
2440 GetCharAt(StreamStartPos + offset - 2, byte2);
2441 if (byte1 == 0x0a && byte2 == 0x0d) {
2442 len -= 2;
2443 } else if (byte1 == 0x0a || byte1 == 0x0d) {
2444 len --;
2445 }
2446 len = (FX_DWORD)offset;
2447 pDict->SetAtInteger(FX_BSTRC("Length"), len);
2448 } else {
2449 m_Pos = StreamStartPos;
2450 if (FindTag(FX_BSTRC("endobj"), 0) < 0) {
2451 return NULL;
2452 }
2453 }
2454 }
2455 }
2456 m_Pos = StreamStartPos;
2457 CPDF_Stream* pStream;
2458 #if defined(_FPDFAPI_MINI_) && !defined(_FXCORE_FEATURE_ALL_)
2459 pStream = FX_NEW CPDF_Stream(m_pFileAccess, pCryptoHandler, m_HeaderOffset + m_Pos, len, pDict, gennum);
2460 m_Pos += len;
2461 #else
2462 FX_LPBYTE pData = FX_Alloc(FX_BYTE, len);
2463 if (!pData) {
2464 return NULL;
2465 }
2466 ReadBlock(pData, len);
2467 if (pCryptoHandler) {
2468 CFX_BinaryBuf dest_buf;
2469 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
2470 FX_LPVOID context = pCryptoHandler->DecryptStart(objnum, gennum);
2471 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
2472 pCryptoHandler->DecryptFinish(context, dest_buf);
2473 FX_Free(pData);
2474 pData = dest_buf.GetBuffer();
2475 len = dest_buf.GetSize();
2476 dest_buf.DetachBuffer();
2477 }
2478 pStream = FX_NEW CPDF_Stream(pData, len, pDict);
2479 #endif
2480 if (pContext) {
2481 pContext->m_DataEnd = pContext->m_DataStart + len;
2482 }
2483 StreamStartPos = m_Pos;
2484 GetNextWord();
2485 if (m_WordSize == 6 && 0 == FXSYS_memcmp32(m_WordBuffer, "endobj", 6)) {
2486 m_Pos = StreamStartPos;
2487 }
2488 return pStream;
2489 }
InitParser(IFX_FileRead * pFileAccess,FX_DWORD HeaderOffset)2490 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, FX_DWORD HeaderOffset)
2491 {
2492 if (m_pFileBuf) {
2493 FX_Free(m_pFileBuf);
2494 m_pFileBuf = NULL;
2495 }
2496 m_pFileBuf = FX_Alloc(FX_BYTE, m_BufSize);
2497 m_HeaderOffset = HeaderOffset;
2498 m_FileLen = pFileAccess->GetSize();
2499 m_Pos = 0;
2500 m_pFileAccess = pFileAccess;
2501 m_BufOffset = 0;
2502 pFileAccess->ReadBlock(m_pFileBuf, 0, (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
2503 }
GetDirectNum()2504 FX_INT32 CPDF_SyntaxParser::GetDirectNum()
2505 {
2506 GetNextWord();
2507 if (!m_bIsNumber) {
2508 return 0;
2509 }
2510 m_WordBuffer[m_WordSize] = 0;
2511 return FXSYS_atoi((FX_LPCSTR)m_WordBuffer);
2512 }
IsWholeWord(FX_FILESIZE startpos,FX_FILESIZE limit,FX_LPCBYTE tag,FX_DWORD taglen)2513 FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, FX_FILESIZE limit, FX_LPCBYTE tag, FX_DWORD taglen)
2514 {
2515 FX_BYTE type = _PDF_CharType[tag[0]];
2516 FX_BOOL bCheckLeft = type != 'D' && type != 'W';
2517 type = _PDF_CharType[tag[taglen - 1]];
2518 FX_BOOL bCheckRight = type != 'D' || type != 'W';
2519 FX_BYTE ch;
2520 if (bCheckRight && startpos + (FX_INT32)taglen <= limit && GetCharAt(startpos + (FX_INT32)taglen, ch)) {
2521 FX_BYTE type = _PDF_CharType[ch];
2522 if (type == 'N' || type == 'R') {
2523 return FALSE;
2524 }
2525 }
2526 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
2527 FX_BYTE type = _PDF_CharType[ch];
2528 if (type == 'N' || type == 'R') {
2529 return FALSE;
2530 }
2531 }
2532 return TRUE;
2533 }
SearchWord(FX_BSTR tag,FX_BOOL bWholeWord,FX_BOOL bForward,FX_FILESIZE limit)2534 FX_BOOL CPDF_SyntaxParser::SearchWord(FX_BSTR tag, FX_BOOL bWholeWord, FX_BOOL bForward, FX_FILESIZE limit)
2535 {
2536 FX_INT32 taglen = tag.GetLength();
2537 if (taglen == 0) {
2538 return FALSE;
2539 }
2540 FX_FILESIZE pos = m_Pos;
2541 FX_INT32 offset = 0;
2542 if (!bForward) {
2543 offset = taglen - 1;
2544 }
2545 FX_LPCBYTE tag_data = tag;
2546 FX_BYTE byte;
2547 while (1) {
2548 if (bForward) {
2549 if (limit) {
2550 if (pos >= m_Pos + limit) {
2551 return FALSE;
2552 }
2553 }
2554 if (!GetCharAt(pos, byte)) {
2555 return FALSE;
2556 }
2557 } else {
2558 if (limit) {
2559 if (pos <= m_Pos - limit) {
2560 return FALSE;
2561 }
2562 }
2563 if (!GetCharAtBackward(pos, byte)) {
2564 return FALSE;
2565 }
2566 }
2567 if (byte == tag_data[offset]) {
2568 if (bForward) {
2569 offset ++;
2570 if (offset < taglen) {
2571 pos ++;
2572 continue;
2573 }
2574 } else {
2575 offset --;
2576 if (offset >= 0) {
2577 pos --;
2578 continue;
2579 }
2580 }
2581 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
2582 if (!bWholeWord || IsWholeWord(startpos, limit, tag, taglen)) {
2583 m_Pos = startpos;
2584 return TRUE;
2585 }
2586 }
2587 if (bForward) {
2588 offset = byte == tag_data[0] ? 1 : 0;
2589 pos ++;
2590 } else {
2591 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
2592 pos --;
2593 }
2594 if (pos < 0) {
2595 return FALSE;
2596 }
2597 }
2598 return FALSE;
2599 }
2600 struct _SearchTagRecord {
2601 FX_LPCBYTE m_pTag;
2602 FX_DWORD m_Len;
2603 FX_DWORD m_Offset;
2604 };
SearchMultiWord(FX_BSTR tags,FX_BOOL bWholeWord,FX_FILESIZE limit)2605 FX_INT32 CPDF_SyntaxParser::SearchMultiWord(FX_BSTR tags, FX_BOOL bWholeWord, FX_FILESIZE limit)
2606 {
2607 FX_INT32 ntags = 1, i;
2608 for (i = 0; i < tags.GetLength(); i ++)
2609 if (tags[i] == 0) {
2610 ntags ++;
2611 }
2612 _SearchTagRecord* pPatterns = FX_Alloc(_SearchTagRecord, ntags);
2613 FX_DWORD start = 0, itag = 0, max_len = 0;
2614 for (i = 0; i <= tags.GetLength(); i ++) {
2615 if (tags[i] == 0) {
2616 FX_DWORD len = i - start;
2617 if (len > max_len) {
2618 max_len = len;
2619 }
2620 pPatterns[itag].m_pTag = tags.GetPtr() + start;
2621 pPatterns[itag].m_Len = len;
2622 pPatterns[itag].m_Offset = 0;
2623 start = i + 1;
2624 itag ++;
2625 }
2626 }
2627 FX_FILESIZE pos = m_Pos;
2628 FX_BYTE byte;
2629 GetCharAt(pos++, byte);
2630 FX_INT32 found = -1;
2631 while (1) {
2632 for (i = 0; i < ntags; i ++) {
2633 if (pPatterns[i].m_pTag[pPatterns[i].m_Offset] == byte) {
2634 pPatterns[i].m_Offset ++;
2635 if (pPatterns[i].m_Offset == pPatterns[i].m_Len) {
2636 if (!bWholeWord || IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag, pPatterns[i].m_Len)) {
2637 found = i;
2638 goto end;
2639 } else {
2640 if (pPatterns[i].m_pTag[0] == byte) {
2641 pPatterns[i].m_Offset = 1;
2642 } else {
2643 pPatterns[i].m_Offset = 0;
2644 }
2645 }
2646 }
2647 } else {
2648 if (pPatterns[i].m_pTag[0] == byte) {
2649 pPatterns[i].m_Offset = 1;
2650 } else {
2651 pPatterns[i].m_Offset = 0;
2652 }
2653 }
2654 }
2655 if (limit && pos >= m_Pos + limit) {
2656 goto end;
2657 }
2658 if (!GetCharAt(pos, byte)) {
2659 goto end;
2660 }
2661 pos ++;
2662 }
2663 end:
2664 FX_Free(pPatterns);
2665 return found;
2666 }
FindTag(FX_BSTR tag,FX_FILESIZE limit)2667 FX_FILESIZE CPDF_SyntaxParser::FindTag(FX_BSTR tag, FX_FILESIZE limit)
2668 {
2669 FX_INT32 taglen = tag.GetLength();
2670 FX_INT32 match = 0;
2671 limit += m_Pos;
2672 FX_FILESIZE startpos = m_Pos;
2673 while (1) {
2674 FX_BYTE ch;
2675 if (!GetNextChar(ch)) {
2676 return -1;
2677 }
2678 if (ch == tag[match]) {
2679 match ++;
2680 if (match == taglen) {
2681 return m_Pos - startpos - taglen;
2682 }
2683 } else {
2684 match = ch == tag[0] ? 1 : 0;
2685 }
2686 if (limit && m_Pos == limit) {
2687 return -1;
2688 }
2689 }
2690 return -1;
2691 }
GetBinary(FX_BYTE * buffer,FX_DWORD size)2692 void CPDF_SyntaxParser::GetBinary(FX_BYTE* buffer, FX_DWORD size)
2693 {
2694 FX_DWORD offset = 0;
2695 FX_BYTE ch;
2696 while (1) {
2697 if (!GetNextChar(ch)) {
2698 return;
2699 }
2700 buffer[offset++] = ch;
2701 if (offset == size) {
2702 break;
2703 }
2704 }
2705 }
CPDF_DataAvail(IFX_FileAvail * pFileAvail,IFX_FileRead * pFileRead)2706 CPDF_DataAvail::CPDF_DataAvail(IFX_FileAvail* pFileAvail, IFX_FileRead* pFileRead)
2707 {
2708 m_pFileAvail = pFileAvail;
2709 m_pFileRead = pFileRead;
2710 m_Pos = 0;
2711 m_dwFileLen = 0;
2712 if (m_pFileRead) {
2713 m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize();
2714 }
2715 m_dwCurrentOffset = 0;
2716 m_WordSize = 0;
2717 m_dwXRefOffset = 0;
2718 m_bufferOffset = 0;
2719 m_dwFirstPageNo = 0;
2720 m_bufferSize = 0;
2721 m_PagesObjNum = 0;
2722 m_dwCurrentXRefSteam = 0;
2723 m_dwAcroFormObjNum = 0;
2724 m_dwInfoObjNum = 0;
2725 m_pDocument = 0;
2726 m_dwEncryptObjNum = 0;
2727 m_dwPrevXRefOffset = 0;
2728 m_dwLastXRefOffset = 0;
2729 m_bDocAvail = FALSE;
2730 m_bMainXRefLoad = FALSE;
2731 m_bDocAvail = FALSE;
2732 m_bLinearized = FALSE;
2733 m_bPagesLoad = FALSE;
2734 m_bPagesTreeLoad = FALSE;
2735 m_bMainXRefLoadedOK = FALSE;
2736 m_bAnnotsLoad = FALSE;
2737 m_bHaveAcroForm = FALSE;
2738 m_bAcroFormLoad = FALSE;
2739 m_bPageLoadedOK = FALSE;
2740 m_bNeedDownLoadResource = FALSE;
2741 m_bLinearizedFormParamLoad = FALSE;
2742 m_pLinearized = NULL;
2743 m_pRoot = NULL;
2744 m_pTrailer = NULL;
2745 m_pCurrentParser = NULL;
2746 m_pAcroForm = NULL;
2747 m_pPageDict = NULL;
2748 m_pPageResource = NULL;
2749 m_pageMapCheckState = NULL;
2750 m_docStatus = PDF_DATAAVAIL_HEADER;
2751 m_parser.m_bOwnFileRead = FALSE;
2752 m_bTotalLoadPageTree = FALSE;
2753 m_bCurPageDictLoadOK = FALSE;
2754 m_bLinearedDataOK = FALSE;
2755 m_pagesLoadState = NULL;
2756 }
~CPDF_DataAvail()2757 CPDF_DataAvail::~CPDF_DataAvail()
2758 {
2759 if (m_pLinearized) {
2760 m_pLinearized->Release();
2761 }
2762 if (m_pRoot) {
2763 m_pRoot->Release();
2764 }
2765 if (m_pTrailer) {
2766 m_pTrailer->Release();
2767 }
2768 if (m_pageMapCheckState) {
2769 delete m_pageMapCheckState;
2770 }
2771 if (m_pagesLoadState) {
2772 delete m_pagesLoadState;
2773 }
2774 FX_INT32 i = 0;
2775 FX_INT32 iSize = m_arrayAcroforms.GetSize();
2776 for (i = 0; i < iSize; ++i) {
2777 ((CPDF_Object *)m_arrayAcroforms.GetAt(i))->Release();
2778 }
2779 }
SetDocument(CPDF_Document * pDoc)2780 void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc)
2781 {
2782 m_pDocument = pDoc;
2783 }
GetObjectSize(FX_DWORD objnum,FX_FILESIZE & offset)2784 FX_DWORD CPDF_DataAvail::GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset)
2785 {
2786 CPDF_Parser *pParser = (CPDF_Parser *)(m_pDocument->GetParser());
2787 if (pParser == NULL) {
2788 return 0;
2789 }
2790 if (objnum >= (FX_DWORD)pParser->m_CrossRef.GetSize()) {
2791 return 0;
2792 }
2793 if (pParser->m_V5Type[objnum] == 2) {
2794 objnum = (FX_DWORD)pParser->m_CrossRef[objnum];
2795 }
2796 if (pParser->m_V5Type[objnum] == 1 || pParser->m_V5Type[objnum] == 255) {
2797 offset = pParser->m_CrossRef[objnum];
2798 if (offset == 0) {
2799 return 0;
2800 }
2801 FX_LPVOID pResult = FXSYS_bsearch(&offset, pParser->m_SortedOffset.GetData(), pParser->m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
2802 if (pResult == NULL) {
2803 return 0;
2804 }
2805 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)pParser->m_SortedOffset.GetData() == pParser->m_SortedOffset.GetSize() - 1) {
2806 return 0;
2807 }
2808 return (FX_DWORD)(((FX_FILESIZE*)pResult)[1] - offset);
2809 }
2810 return 0;
2811 }
IsObjectsAvail(CFX_PtrArray & obj_array,FX_BOOL bParsePage,IFX_DownloadHints * pHints,CFX_PtrArray & ret_array)2812 FX_BOOL CPDF_DataAvail::IsObjectsAvail(CFX_PtrArray& obj_array, FX_BOOL bParsePage, IFX_DownloadHints* pHints, CFX_PtrArray &ret_array)
2813 {
2814 if (!obj_array.GetSize()) {
2815 return TRUE;
2816 }
2817 FX_DWORD count = 0;
2818 CFX_PtrArray new_obj_array;
2819 FX_INT32 i = 0;
2820 for (i = 0; i < obj_array.GetSize(); i++) {
2821 CPDF_Object *pObj = (CPDF_Object *)obj_array[i];
2822 if (!pObj) {
2823 continue;
2824 }
2825 FX_INT32 type = pObj->GetType();
2826 switch (type) {
2827 case PDFOBJ_ARRAY: {
2828 CPDF_Array *pArray = pObj->GetArray();
2829 for (FX_DWORD k = 0; k < pArray->GetCount(); k++) {
2830 new_obj_array.Add(pArray->GetElement(k));
2831 }
2832 }
2833 break;
2834 case PDFOBJ_STREAM:
2835 pObj = pObj->GetDict();
2836 case PDFOBJ_DICTIONARY: {
2837 CPDF_Dictionary *pDict = pObj->GetDict();
2838 if (pDict->GetString("Type") == "Page" && !bParsePage) {
2839 continue;
2840 }
2841 FX_POSITION pos = pDict->GetStartPos();
2842 while (pos) {
2843 CPDF_Object *value;
2844 CFX_ByteString key;
2845 value = pDict->GetNextElement(pos, key);
2846 if (key != "Parent") {
2847 new_obj_array.Add(value);
2848 }
2849 }
2850 }
2851 break;
2852 case PDFOBJ_REFERENCE: {
2853 CPDF_Reference *pRef = (CPDF_Reference*)pObj;
2854 FX_DWORD dwNum = pRef->GetRefObjNum();
2855 FX_FILESIZE offset;
2856 FX_DWORD size = GetObjectSize(pRef->GetRefObjNum(), offset);
2857 if (!size) {
2858 break;
2859 }
2860 size = (FX_DWORD)((FX_FILESIZE)(offset + size + 512) > m_dwFileLen ? m_dwFileLen - offset : size + 512);
2861 if (!m_pFileAvail->IsDataAvail(offset, size)) {
2862 pHints->AddSegment(offset, size);
2863 ret_array.Add(pObj);
2864 count++;
2865 } else if (!m_objnum_array.Find(dwNum)) {
2866 m_objnum_array.AddObjNum(dwNum);
2867 CPDF_Object *pReferred = m_pDocument->GetIndirectObject(pRef->GetRefObjNum(), NULL);
2868 if (pReferred) {
2869 new_obj_array.Add(pReferred);
2870 }
2871 }
2872 }
2873 break;
2874 }
2875 }
2876 if (count > 0) {
2877 FX_INT32 iSize = new_obj_array.GetSize();
2878 for (i = 0; i < iSize; ++i) {
2879 CPDF_Object *pObj = (CPDF_Object *)new_obj_array[i];
2880 FX_INT32 type = pObj->GetType();
2881 if (type == PDFOBJ_REFERENCE) {
2882 CPDF_Reference *pRef = (CPDF_Reference *)pObj;
2883 FX_DWORD dwNum = pRef->GetRefObjNum();
2884 if (!m_objnum_array.Find(dwNum)) {
2885 ret_array.Add(pObj);
2886 }
2887 } else {
2888 ret_array.Add(pObj);
2889 }
2890 }
2891 return FALSE;
2892 }
2893 obj_array.RemoveAll();
2894 obj_array.Append(new_obj_array);
2895 return IsObjectsAvail(obj_array, FALSE, pHints, ret_array);
2896 }
IsDocAvail(IFX_DownloadHints * pHints)2897 FX_BOOL CPDF_DataAvail::IsDocAvail(IFX_DownloadHints* pHints)
2898 {
2899 if (!m_dwFileLen && m_pFileRead) {
2900 m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize();
2901 if (!m_dwFileLen) {
2902 return TRUE;
2903 }
2904 }
2905 while (!m_bDocAvail) {
2906 if (!CheckDocStatus(pHints)) {
2907 return FALSE;
2908 }
2909 }
2910 return TRUE;
2911 }
CheckAcroFormSubObject(IFX_DownloadHints * pHints)2912 FX_BOOL CPDF_DataAvail::CheckAcroFormSubObject(IFX_DownloadHints* pHints)
2913 {
2914 if (!m_objs_array.GetSize()) {
2915 m_objs_array.RemoveAll();
2916 m_objnum_array.RemoveAll();
2917 CFX_PtrArray obj_array;
2918 obj_array.Append(m_arrayAcroforms);
2919 FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array);
2920 if (bRet) {
2921 m_objs_array.RemoveAll();
2922 }
2923 return bRet;
2924 } else {
2925 CFX_PtrArray new_objs_array;
2926 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
2927 if (bRet) {
2928 FX_INT32 iSize = m_arrayAcroforms.GetSize();
2929 for (FX_INT32 i = 0; i < iSize; ++i) {
2930 ((CPDF_Object *)m_arrayAcroforms.GetAt(i))->Release();
2931 }
2932 m_arrayAcroforms.RemoveAll();
2933 } else {
2934 m_objs_array.RemoveAll();
2935 m_objs_array.Append(new_objs_array);
2936 }
2937 return bRet;
2938 }
2939 }
CheckAcroForm(IFX_DownloadHints * pHints)2940 FX_BOOL CPDF_DataAvail::CheckAcroForm(IFX_DownloadHints* pHints)
2941 {
2942 FX_BOOL bExist = FALSE;
2943 m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist);
2944 if (!bExist) {
2945 m_docStatus = PDF_DATAAVAIL_PAGETREE;
2946 return TRUE;
2947 }
2948 if (!m_pAcroForm) {
2949 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
2950 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
2951 return TRUE;
2952 }
2953 return FALSE;
2954 }
2955 m_arrayAcroforms.Add(m_pAcroForm);
2956 m_docStatus = PDF_DATAAVAIL_PAGETREE;
2957 return TRUE;
2958 }
CheckDocStatus(IFX_DownloadHints * pHints)2959 FX_BOOL CPDF_DataAvail::CheckDocStatus(IFX_DownloadHints *pHints)
2960 {
2961 switch (m_docStatus) {
2962 case PDF_DATAAVAIL_HEADER:
2963 return CheckHeader(pHints);
2964 case PDF_DATAAVAIL_FIRSTPAGE:
2965 case PDF_DATAAVAIL_FIRSTPAGE_PREPARE:
2966 return CheckFirstPage(pHints);
2967 case PDF_DATAAVAIL_END:
2968 return CheckEnd(pHints);
2969 case PDF_DATAAVAIL_CROSSREF:
2970 return CheckCrossRef(pHints);
2971 case PDF_DATAAVAIL_CROSSREF_ITEM:
2972 return CheckCrossRefItem(pHints);
2973 case PDF_DATAAVAIL_CROSSREF_STREAM:
2974 return CheckAllCrossRefStream(pHints);
2975 case PDF_DATAAVAIL_TRAILER:
2976 return CheckTrailer(pHints);
2977 case PDF_DATAAVAIL_TRAILER_APPEND:
2978 return CheckTrailerAppend(pHints);
2979 case PDF_DATAAVAIL_LOADALLCRSOSSREF:
2980 return LoadAllXref(pHints);
2981 case PDF_DATAAVAIL_LOADALLFILE:
2982 return LoadAllFile(pHints);
2983 case PDF_DATAAVAIL_ROOT:
2984 return CheckRoot(pHints);
2985 case PDF_DATAAVAIL_INFO:
2986 return CheckInfo(pHints);
2987 case PDF_DATAAVAIL_ACROFORM:
2988 return CheckAcroForm(pHints);
2989 case PDF_DATAAVAIL_PAGETREE:
2990 if (m_bTotalLoadPageTree) {
2991 return CheckPages(pHints);
2992 } else {
2993 return LoadDocPages(pHints);
2994 }
2995 case PDF_DATAAVAIL_PAGE:
2996 if (m_bTotalLoadPageTree) {
2997 return CheckPage(pHints);
2998 } else {
2999 m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
3000 return TRUE;
3001 }
3002 case PDF_DATAAVAIL_ERROR:
3003 return LoadAllFile(pHints);
3004 case PDF_DATAAVAIL_PAGE_LATERLOAD:
3005 m_docStatus = PDF_DATAAVAIL_PAGE;
3006 default:
3007 m_bDocAvail = TRUE;
3008 return TRUE;
3009 }
3010 }
CheckPageStatus(IFX_DownloadHints * pHints)3011 FX_BOOL CPDF_DataAvail::CheckPageStatus(IFX_DownloadHints* pHints)
3012 {
3013 switch (m_docStatus) {
3014 case PDF_DATAAVAIL_PAGETREE:
3015 return CheckPages(pHints);
3016 case PDF_DATAAVAIL_PAGE:
3017 return CheckPage(pHints);
3018 case PDF_DATAAVAIL_ERROR:
3019 return LoadAllFile(pHints);
3020 default:
3021 m_bPagesTreeLoad = TRUE;
3022 m_bPagesLoad = TRUE;
3023 return TRUE;
3024 }
3025 }
LoadAllFile(IFX_DownloadHints * pHints)3026 FX_BOOL CPDF_DataAvail::LoadAllFile(IFX_DownloadHints* pHints)
3027 {
3028 if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) {
3029 m_docStatus = PDF_DATAAVAIL_DONE;
3030 return TRUE;
3031 }
3032 pHints->AddSegment(0, (FX_DWORD)m_dwFileLen);
3033 return FALSE;
3034 }
LoadAllXref(IFX_DownloadHints * pHints)3035 FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints)
3036 {
3037 m_parser.m_Syntax.InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset);
3038 m_parser.m_bOwnFileRead = FALSE;
3039 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
3040 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3041 return FALSE;
3042 }
3043 FXSYS_qsort(m_parser.m_SortedOffset.GetData(), m_parser.m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
3044 m_dwRootObjNum = m_parser.GetRootObjNum();
3045 m_dwInfoObjNum = m_parser.GetInfoObjNum();
3046 m_pCurrentParser = &m_parser;
3047 m_docStatus = PDF_DATAAVAIL_ROOT;
3048 return TRUE;
3049 }
GetObject(FX_DWORD objnum,IFX_DownloadHints * pHints,FX_BOOL * pExistInFile)3050 CPDF_Object* CPDF_DataAvail::GetObject(FX_DWORD objnum, IFX_DownloadHints* pHints, FX_BOOL *pExistInFile)
3051 {
3052 CPDF_Object *pRet = NULL;
3053 if (pExistInFile) {
3054 *pExistInFile = TRUE;
3055 }
3056 if (m_pDocument == NULL) {
3057 FX_FILESIZE offset = m_parser.GetObjectOffset(objnum);
3058 if (offset < 0) {
3059 *pExistInFile = FALSE;
3060 return NULL;
3061 }
3062 FX_DWORD size = (FX_DWORD)m_parser.GetObjectSize(objnum);
3063 size = (FX_DWORD)(((FX_FILESIZE)(offset + size + 512)) > m_dwFileLen ? m_dwFileLen - offset : size + 512);
3064 if (!m_pFileAvail->IsDataAvail(offset, size)) {
3065 pHints->AddSegment(offset, size);
3066 return NULL;
3067 }
3068 pRet = m_parser.ParseIndirectObject(NULL, objnum);
3069 if (!pRet && pExistInFile) {
3070 *pExistInFile = FALSE;
3071 }
3072 return pRet;
3073 }
3074 FX_FILESIZE offset;
3075 FX_DWORD size = GetObjectSize(objnum, offset);
3076 size = (FX_DWORD)((FX_FILESIZE)(offset + size + 512) > m_dwFileLen ? m_dwFileLen - offset : size + 512);
3077 if (!m_pFileAvail->IsDataAvail(offset, size)) {
3078 pHints->AddSegment(offset, size);
3079 return NULL;
3080 }
3081 CPDF_Parser *pParser = (CPDF_Parser *)(m_pDocument->GetParser());
3082 pRet = pParser->ParseIndirectObject(NULL, objnum, NULL);
3083 if (!pRet && pExistInFile) {
3084 *pExistInFile = FALSE;
3085 }
3086 return pRet;
3087 }
CheckInfo(IFX_DownloadHints * pHints)3088 FX_BOOL CPDF_DataAvail::CheckInfo(IFX_DownloadHints* pHints)
3089 {
3090 FX_BOOL bExist = FALSE;
3091 CPDF_Object *pInfo = GetObject(m_dwInfoObjNum, pHints, &bExist);
3092 if (!bExist) {
3093 if (m_bHaveAcroForm) {
3094 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3095 } else {
3096 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3097 }
3098 return TRUE;
3099 }
3100 if (!pInfo) {
3101 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3102 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3103 return TRUE;
3104 }
3105 if (m_Pos == m_dwFileLen) {
3106 m_docStatus = PDF_DATAAVAIL_ERROR;
3107 }
3108 return FALSE;
3109 }
3110 if (pInfo) {
3111 pInfo->Release();
3112 }
3113 if (m_bHaveAcroForm) {
3114 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3115 } else {
3116 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3117 }
3118 return TRUE;
3119 }
CheckRoot(IFX_DownloadHints * pHints)3120 FX_BOOL CPDF_DataAvail::CheckRoot(IFX_DownloadHints* pHints)
3121 {
3122 FX_BOOL bExist = FALSE;
3123 m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist);
3124 if (!bExist) {
3125 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3126 return TRUE;
3127 }
3128 if (!m_pRoot) {
3129 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3130 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3131 return TRUE;
3132 }
3133 return FALSE;
3134 }
3135 CPDF_Reference* pRef = (CPDF_Reference*)m_pRoot->GetDict()->GetElement(FX_BSTRC("Pages"));
3136 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
3137 m_docStatus = PDF_DATAAVAIL_ERROR;
3138 return FALSE;
3139 }
3140 m_PagesObjNum = pRef->GetRefObjNum();
3141 CPDF_Reference* pAcroFormRef = (CPDF_Reference*)m_pRoot->GetDict()->GetElement(FX_BSTRC("AcroForm"));
3142 if (pAcroFormRef && pAcroFormRef->GetType() == PDFOBJ_REFERENCE) {
3143 m_bHaveAcroForm = TRUE;
3144 m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum();
3145 }
3146 if (m_dwInfoObjNum) {
3147 m_docStatus = PDF_DATAAVAIL_INFO;
3148 } else {
3149 if (m_bHaveAcroForm) {
3150 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3151 } else {
3152 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3153 }
3154 }
3155 return TRUE;
3156 }
PreparePageItem()3157 FX_BOOL CPDF_DataAvail::PreparePageItem()
3158 {
3159 CPDF_Dictionary *pRoot = m_pDocument->GetRoot();
3160 CPDF_Reference* pRef = (CPDF_Reference*)pRoot->GetElement(FX_BSTRC("Pages"));
3161 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
3162 m_docStatus = PDF_DATAAVAIL_ERROR;
3163 return FALSE;
3164 }
3165 m_PagesObjNum = pRef->GetRefObjNum();
3166 m_pCurrentParser = (CPDF_Parser *)m_pDocument->GetParser();
3167 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3168 return TRUE;
3169 }
IsFirstCheck(int iPage)3170 FX_BOOL CPDF_DataAvail::IsFirstCheck(int iPage)
3171 {
3172 if (NULL == m_pageMapCheckState) {
3173 m_pageMapCheckState = FX_NEW CFX_CMapDWordToDWord();
3174 }
3175 FX_DWORD dwValue = 0;
3176 if (!m_pageMapCheckState->Lookup(iPage, dwValue)) {
3177 m_pageMapCheckState->SetAt(iPage, 1);
3178 return TRUE;
3179 }
3180 if (dwValue != 0) {
3181 return FALSE;
3182 }
3183 m_pageMapCheckState->SetAt(iPage, 1);
3184 return TRUE;
3185 }
ResetFirstCheck(int iPage)3186 void CPDF_DataAvail::ResetFirstCheck(int iPage)
3187 {
3188 if (NULL == m_pageMapCheckState) {
3189 m_pageMapCheckState = FX_NEW CFX_CMapDWordToDWord();
3190 }
3191 FX_DWORD dwValue = 1;
3192 if (!m_pageMapCheckState->Lookup(iPage, dwValue)) {
3193 return;
3194 }
3195 m_pageMapCheckState->SetAt(iPage, 0);
3196 }
CheckPage(IFX_DownloadHints * pHints)3197 FX_BOOL CPDF_DataAvail::CheckPage(IFX_DownloadHints* pHints)
3198 {
3199 FX_DWORD i = 0;
3200 FX_DWORD iLen = m_PageObjList.GetSize();
3201 CFX_DWordArray UnavailObjList;
3202 for (; i < iLen; ++i) {
3203 FX_DWORD dwPageObjNum = m_PageObjList.GetAt(i);
3204 FX_BOOL bExist = FALSE;
3205 CPDF_Object *pObj = GetObject(dwPageObjNum, pHints, &bExist);
3206 if (!pObj) {
3207 if (bExist) {
3208 UnavailObjList.Add(dwPageObjNum);
3209 }
3210 continue;
3211 }
3212 if (pObj->GetType() == PDFOBJ_ARRAY) {
3213 CPDF_Array *pArray = pObj->GetArray();
3214 if (pArray) {
3215 FX_INT32 iSize = pArray->GetCount();
3216 CPDF_Object *pItem = NULL;
3217 for (FX_INT32 j = 0; j < iSize; ++j) {
3218 pItem = pArray->GetElement(j);
3219 if (pItem && pItem->GetType() == PDFOBJ_REFERENCE) {
3220 UnavailObjList.Add(((CPDF_Reference *)pItem)->GetRefObjNum());
3221 }
3222 }
3223 }
3224 }
3225 if (pObj->GetType() != PDFOBJ_DICTIONARY) {
3226 pObj->Release();
3227 continue;
3228 }
3229 CFX_ByteString type = pObj->GetDict()->GetString(FX_BSTRC("Type"));
3230 if (type == FX_BSTRC("Pages")) {
3231 m_PagesArray.Add(pObj);
3232 continue;
3233 }
3234 pObj->Release();
3235 }
3236 m_PageObjList.RemoveAll();
3237 if (UnavailObjList.GetSize()) {
3238 m_PageObjList.Append(UnavailObjList);
3239 return FALSE;
3240 }
3241 i = 0;
3242 iLen = m_PagesArray.GetSize();
3243 for (; i < iLen; ++i) {
3244 CPDF_Object *pPages = (CPDF_Object *)m_PagesArray.GetAt(i);
3245 if (!pPages) {
3246 continue;
3247 }
3248 if (!GetPageKids(m_pCurrentParser, pPages)) {
3249 pPages->Release();
3250 while (i++ < iLen) {
3251 pPages = (CPDF_Object *)m_PagesArray.GetAt(i);
3252 pPages->Release();
3253 }
3254 m_PagesArray.RemoveAll();
3255 m_docStatus = PDF_DATAAVAIL_ERROR;
3256 return FALSE;
3257 }
3258 pPages->Release();
3259 }
3260 m_PagesArray.RemoveAll();
3261 if (!m_PageObjList.GetSize()) {
3262 m_docStatus = PDF_DATAAVAIL_DONE;
3263 return TRUE;
3264 }
3265 return TRUE;
3266 }
GetPageKids(CPDF_Parser * pParser,CPDF_Object * pPages)3267 FX_BOOL CPDF_DataAvail::GetPageKids(CPDF_Parser *pParser, CPDF_Object *pPages)
3268 {
3269 if (!pParser) {
3270 m_docStatus = PDF_DATAAVAIL_ERROR;
3271 return FALSE;
3272 }
3273 CPDF_Object *pKids = pPages->GetDict()->GetElement(FX_BSTRC("Kids"));
3274 if (!pKids) {
3275 return TRUE;
3276 }
3277 switch (pKids->GetType()) {
3278 case PDFOBJ_REFERENCE: {
3279 CPDF_Reference *pKid = (CPDF_Reference *)pKids;
3280 m_PageObjList.Add(pKid->GetRefObjNum());
3281 }
3282 break;
3283 case PDFOBJ_ARRAY: {
3284 CPDF_Array *pKidsArray = (CPDF_Array *)pKids;
3285 for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) {
3286 CPDF_Reference *pKid = (CPDF_Reference *)pKidsArray->GetElement(i);
3287 m_PageObjList.Add(pKid->GetRefObjNum());
3288 }
3289 }
3290 break;
3291 default:
3292 m_docStatus = PDF_DATAAVAIL_ERROR;
3293 return FALSE;
3294 }
3295 return TRUE;
3296 }
CheckPages(IFX_DownloadHints * pHints)3297 FX_BOOL CPDF_DataAvail::CheckPages(IFX_DownloadHints* pHints)
3298 {
3299 FX_BOOL bExist = FALSE;
3300 CPDF_Object *pPages = GetObject(m_PagesObjNum, pHints, &bExist);
3301 if (!bExist) {
3302 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3303 return TRUE;
3304 }
3305 if (!pPages) {
3306 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3307 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3308 return TRUE;
3309 }
3310 return FALSE;
3311 }
3312 FX_BOOL bNeedLoad = FALSE;
3313 if (!GetPageKids(m_pCurrentParser, pPages)) {
3314 pPages->Release();
3315 m_docStatus = PDF_DATAAVAIL_ERROR;
3316 return FALSE;
3317 }
3318 pPages->Release();
3319 m_docStatus = PDF_DATAAVAIL_PAGE;
3320 return TRUE;
3321 }
CheckHeader(IFX_DownloadHints * pHints)3322 FX_BOOL CPDF_DataAvail::CheckHeader(IFX_DownloadHints* pHints)
3323 {
3324 FX_DWORD req_size = 1024;
3325 if ((FX_FILESIZE)req_size > m_dwFileLen) {
3326 req_size = (FX_DWORD)m_dwFileLen;
3327 }
3328 if (m_pFileAvail->IsDataAvail(0, req_size)) {
3329 FX_BYTE buffer[1024];
3330 m_pFileRead->ReadBlock(buffer, 0, req_size);
3331 if (IsLinearizedFile(buffer, req_size)) {
3332 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE;
3333 } else {
3334 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3335 return FALSE;
3336 }
3337 m_docStatus = PDF_DATAAVAIL_END;
3338 }
3339 return TRUE;
3340 }
3341 pHints->AddSegment(0, req_size);
3342 return FALSE;
3343 }
CheckFirstPage(IFX_DownloadHints * pHints)3344 FX_BOOL CPDF_DataAvail::CheckFirstPage(IFX_DownloadHints *pHints)
3345 {
3346 FX_DWORD dwFirstPageEndOffset = 0;
3347 CPDF_Object *pEndOffSet = m_pLinearized->GetDict()->GetElement(FX_BSTRC("E"));
3348 if (!pEndOffSet) {
3349 m_docStatus = PDF_DATAAVAIL_ERROR;
3350 return FALSE;
3351 }
3352 CPDF_Object *pXRefOffset = m_pLinearized->GetDict()->GetElement(FX_BSTRC("T"));
3353 if (!pXRefOffset) {
3354 m_docStatus = PDF_DATAAVAIL_ERROR;
3355 return FALSE;
3356 }
3357 CPDF_Object *pFileLen = m_pLinearized->GetDict()->GetElement(FX_BSTRC("L"));
3358 if (!pFileLen) {
3359 m_docStatus = PDF_DATAAVAIL_ERROR;
3360 return FALSE;
3361 }
3362 FX_BOOL bNeedDownLoad = FALSE;
3363 if (pEndOffSet->GetType() == PDFOBJ_NUMBER) {
3364 FX_DWORD dwEnd = pEndOffSet->GetInteger();
3365 dwEnd += 512;
3366 if ((FX_FILESIZE)dwEnd > m_dwFileLen) {
3367 dwEnd = (FX_DWORD)m_dwFileLen;
3368 }
3369 FX_INT32 iStartPos = (FX_INT32)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
3370 FX_INT32 iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
3371 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
3372 pHints->AddSegment(iStartPos, iSize);
3373 bNeedDownLoad = TRUE;
3374 }
3375 }
3376 m_dwLastXRefOffset = 0;
3377 FX_FILESIZE dwFileLen = 0;
3378 if (pXRefOffset->GetType() == PDFOBJ_NUMBER) {
3379 m_dwLastXRefOffset = pXRefOffset->GetInteger();
3380 }
3381 if (pFileLen->GetType() == PDFOBJ_NUMBER) {
3382 dwFileLen = pFileLen->GetInteger();
3383 }
3384 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, (FX_DWORD)(dwFileLen - m_dwLastXRefOffset))) {
3385 if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) {
3386 FX_DWORD dwSize = (FX_DWORD)(dwFileLen - m_dwLastXRefOffset);
3387 FX_FILESIZE offset = m_dwLastXRefOffset;
3388 if (dwSize < 512 && dwFileLen > 512) {
3389 dwSize = 512;
3390 offset = dwFileLen - 512;
3391 }
3392 pHints->AddSegment(offset, dwSize);
3393 }
3394 } else {
3395 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
3396 }
3397 if (!bNeedDownLoad && m_docStatus == PDF_DATAAVAIL_FIRSTPAGE_PREPARE) {
3398 m_docStatus = PDF_DATAAVAIL_DONE;
3399 return TRUE;
3400 }
3401 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
3402 return FALSE;
3403 }
ParseIndirectObjectAt(FX_FILESIZE pos,FX_DWORD objnum)3404 CPDF_Object * CPDF_DataAvail::ParseIndirectObjectAt(FX_FILESIZE pos, FX_DWORD objnum)
3405 {
3406 FX_FILESIZE SavedPos = m_syntaxParser.SavePos();
3407 m_syntaxParser.RestorePos(pos);
3408 FX_BOOL bIsNumber;
3409 CFX_ByteString word = m_syntaxParser.GetNextWord(bIsNumber);
3410 if (!bIsNumber) {
3411 return NULL;
3412 }
3413 FX_DWORD real_objnum = FXSYS_atoi(word);
3414 if (objnum && real_objnum != objnum) {
3415 return NULL;
3416 }
3417 word = m_syntaxParser.GetNextWord(bIsNumber);
3418 if (!bIsNumber) {
3419 return NULL;
3420 }
3421 FX_DWORD gennum = FXSYS_atoi(word);
3422 if (m_syntaxParser.GetKeyword() != FX_BSTRC("obj")) {
3423 m_syntaxParser.RestorePos(SavedPos);
3424 return NULL;
3425 }
3426 CPDF_Object* pObj = m_syntaxParser.GetObject(NULL, objnum, gennum, 0);
3427 m_syntaxParser.RestorePos(SavedPos);
3428 return pObj;
3429 }
IsLinearizedPDF()3430 FX_INT32 CPDF_DataAvail::IsLinearizedPDF()
3431 {
3432 FX_DWORD req_size = 1024;
3433 if (!m_pFileAvail->IsDataAvail(0, req_size)) {
3434 return PDF_UNKNOW_LINEARIZED;
3435 }
3436 if (!m_pFileRead) {
3437 return PDF_NOT_LINEARIZED;
3438 }
3439 FX_FILESIZE dwSize = m_pFileRead->GetSize();
3440 if (dwSize < (FX_FILESIZE)req_size) {
3441 return PDF_UNKNOW_LINEARIZED;
3442 }
3443 FX_BYTE buffer[1024];
3444 m_pFileRead->ReadBlock(buffer, 0, req_size);
3445 if (IsLinearizedFile(buffer, req_size)) {
3446 return PDF_IS_LINEARIZED;
3447 }
3448 return PDF_NOT_LINEARIZED;
3449 }
IsLinearizedFile(FX_LPBYTE pData,FX_DWORD dwLen)3450 FX_BOOL CPDF_DataAvail::IsLinearizedFile(FX_LPBYTE pData, FX_DWORD dwLen)
3451 {
3452 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(pData, (size_t)dwLen, FALSE));
3453 FX_INT32 offset = GetHeaderOffset((IFX_FileStream*)file);
3454 if (offset == -1) {
3455 m_docStatus = PDF_DATAAVAIL_ERROR;
3456 return FALSE;
3457 }
3458 m_dwHeaderOffset = offset;
3459 m_syntaxParser.InitParser((IFX_FileStream*)file, offset);
3460 m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9);
3461 FX_BOOL bNumber = FALSE;
3462 FX_FILESIZE dwSavePos = m_syntaxParser.SavePos();
3463 CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(bNumber);
3464 if (!bNumber) {
3465 return FALSE;
3466 }
3467 FX_DWORD objnum = FXSYS_atoi(wordObjNum);
3468 if (m_pLinearized) {
3469 m_pLinearized->Release();
3470 m_pLinearized = NULL;
3471 }
3472 m_pLinearized = ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum);
3473 if (!m_pLinearized) {
3474 return FALSE;
3475 }
3476 if (m_pLinearized->GetDict()->GetElement(FX_BSTRC("Linearized"))) {
3477 CPDF_Object *pLen = m_pLinearized->GetDict()->GetElement(FX_BSTRC("L"));
3478 if (!pLen) {
3479 return FALSE;
3480 }
3481 if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) {
3482 return FALSE;
3483 }
3484 m_bLinearized = TRUE;
3485 CPDF_Object *pNo = m_pLinearized->GetDict()->GetElement(FX_BSTRC("P"));
3486 if (pNo && pNo->GetType() == PDFOBJ_NUMBER) {
3487 m_dwFirstPageNo = pNo->GetInteger();
3488 }
3489 return TRUE;
3490 }
3491 return FALSE;
3492 }
CheckEnd(IFX_DownloadHints * pHints)3493 FX_BOOL CPDF_DataAvail::CheckEnd(IFX_DownloadHints* pHints)
3494 {
3495 FX_DWORD req_pos = (FX_DWORD)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0);
3496 FX_DWORD dwSize = (FX_DWORD)(m_dwFileLen - req_pos);
3497 if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) {
3498 FX_BYTE buffer[1024];
3499 m_pFileRead->ReadBlock(buffer, req_pos, dwSize);
3500 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(buffer, (size_t)dwSize, FALSE));
3501 m_syntaxParser.InitParser((IFX_FileStream*)file, 0);
3502 m_syntaxParser.RestorePos(dwSize - 1);
3503 if (m_syntaxParser.SearchWord(FX_BSTRC("startxref"), TRUE, FALSE, dwSize)) {
3504 FX_BOOL bNumber;
3505 m_syntaxParser.GetNextWord(bNumber);
3506 CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(bNumber);
3507 if (!bNumber) {
3508 m_docStatus = PDF_DATAAVAIL_ERROR;
3509 return FALSE;
3510 }
3511 m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
3512 if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) {
3513 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3514 return TRUE;
3515 }
3516 m_dwLastXRefOffset = m_dwXRefOffset;
3517 SetStartOffset(m_dwXRefOffset);
3518 m_docStatus = PDF_DATAAVAIL_CROSSREF;
3519 return TRUE;
3520 } else {
3521 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3522 return TRUE;
3523 }
3524 }
3525 pHints->AddSegment(req_pos, dwSize);
3526 return FALSE;
3527 }
CheckCrossRefStream(IFX_DownloadHints * pHints,FX_FILESIZE & xref_offset)3528 FX_DWORD CPDF_DataAvail::CheckCrossRefStream(IFX_DownloadHints* pHints, FX_FILESIZE &xref_offset)
3529 {
3530 xref_offset = 0;
3531 FX_DWORD req_size = (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3532 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {
3533 FX_INT32 iSize = (FX_INT32)(m_Pos + req_size - m_dwCurrentXRefSteam);
3534 CFX_BinaryBuf buf(iSize);
3535 FX_LPBYTE pBuf = buf.GetBuffer();
3536 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);
3537 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
3538 m_parser.m_Syntax.InitParser((IFX_FileStream*)file, 0);
3539 FX_BOOL bNumber = FALSE;
3540 FX_FILESIZE dwSavePos = m_parser.m_Syntax.SavePos();
3541 CFX_ByteString objnum = m_parser.m_Syntax.GetNextWord(bNumber);
3542 if (!bNumber) {
3543 return -1;
3544 }
3545 FX_DWORD objNum = FXSYS_atoi(objnum);
3546 CPDF_Object *pObj = m_parser.ParseIndirectObjectAt(NULL, 0, objNum, NULL);
3547 if (!pObj) {
3548 m_Pos += m_parser.m_Syntax.SavePos();
3549 return 0;
3550 }
3551 CPDF_Object *pName = pObj->GetDict()->GetElement(FX_BSTRC("Type"));
3552 if (pName && pName->GetType() == PDFOBJ_NAME) {
3553 if (pName->GetString() == FX_BSTRC("XRef")) {
3554 m_Pos += m_parser.m_Syntax.SavePos();
3555 xref_offset = pObj->GetDict()->GetInteger(FX_BSTRC("Prev"));
3556 pObj->Release();
3557 return 1;
3558 } else {
3559 pObj->Release();
3560 return -1;
3561 }
3562 }
3563 pObj->Release();
3564 return -1;
3565 }
3566 pHints->AddSegment(m_Pos, req_size);
3567 return 0;
3568 }
SetStartOffset(FX_FILESIZE dwOffset)3569 inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset)
3570 {
3571 m_Pos = dwOffset;
3572 }
3573 #define MAX_WORD_BUFFER 256
GetNextToken(CFX_ByteString & token)3574 FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString &token)
3575 {
3576 m_WordSize = 0;
3577 FX_BYTE ch;
3578 if (!GetNextChar(ch)) {
3579 return FALSE;
3580 }
3581 FX_BYTE type = _PDF_CharType[ch];
3582 while (1) {
3583 while (type == 'W') {
3584 if (!GetNextChar(ch)) {
3585 return FALSE;
3586 }
3587 type = _PDF_CharType[ch];
3588 }
3589 if (ch != '%') {
3590 break;
3591 }
3592 while (1) {
3593 if (!GetNextChar(ch)) {
3594 return FALSE;
3595 }
3596 if (ch == '\r' || ch == '\n') {
3597 break;
3598 }
3599 }
3600 type = _PDF_CharType[ch];
3601 }
3602 if (type == 'D') {
3603 m_WordBuffer[m_WordSize++] = ch;
3604 if (ch == '/') {
3605 while (1) {
3606 if (!GetNextChar(ch)) {
3607 return FALSE;
3608 }
3609 type = _PDF_CharType[ch];
3610 if (type != 'R' && type != 'N') {
3611 m_Pos --;
3612 CFX_ByteString ret(m_WordBuffer, m_WordSize);
3613 token = ret;
3614 return TRUE;
3615 }
3616 if (m_WordSize < MAX_WORD_BUFFER) {
3617 m_WordBuffer[m_WordSize++] = ch;
3618 }
3619 }
3620 } else if (ch == '<') {
3621 if (!GetNextChar(ch)) {
3622 return FALSE;
3623 }
3624 if (ch == '<') {
3625 m_WordBuffer[m_WordSize++] = ch;
3626 } else {
3627 m_Pos --;
3628 }
3629 } else if (ch == '>') {
3630 if (!GetNextChar(ch)) {
3631 return FALSE;
3632 }
3633 if (ch == '>') {
3634 m_WordBuffer[m_WordSize++] = ch;
3635 } else {
3636 m_Pos --;
3637 }
3638 }
3639 CFX_ByteString ret(m_WordBuffer, m_WordSize);
3640 token = ret;
3641 return TRUE;
3642 }
3643 while (1) {
3644 if (m_WordSize < MAX_WORD_BUFFER) {
3645 m_WordBuffer[m_WordSize++] = ch;
3646 }
3647 if (!GetNextChar(ch)) {
3648 return FALSE;
3649 }
3650 type = _PDF_CharType[ch];
3651 if (type == 'D' || type == 'W') {
3652 m_Pos --;
3653 break;
3654 }
3655 }
3656 CFX_ByteString ret(m_WordBuffer, m_WordSize);
3657 token = ret;
3658 return TRUE;
3659 }
GetNextChar(FX_BYTE & ch)3660 FX_BOOL CPDF_DataAvail::GetNextChar(FX_BYTE &ch)
3661 {
3662 FX_FILESIZE pos = m_Pos;
3663 if (pos >= m_dwFileLen) {
3664 return FALSE;
3665 }
3666 if (m_bufferOffset >= pos || (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) {
3667 FX_FILESIZE read_pos = pos;
3668 FX_DWORD read_size = 512;
3669 if ((FX_FILESIZE)read_size > m_dwFileLen) {
3670 read_size = (FX_DWORD)m_dwFileLen;
3671 }
3672 if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen) {
3673 read_pos = m_dwFileLen - read_size;
3674 }
3675 if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size)) {
3676 return FALSE;
3677 }
3678 m_bufferOffset = read_pos;
3679 m_bufferSize = read_size;
3680 }
3681 ch = m_bufferData[pos - m_bufferOffset];
3682 m_Pos ++;
3683 return TRUE;
3684 }
CheckCrossRefItem(IFX_DownloadHints * pHints)3685 FX_BOOL CPDF_DataAvail::CheckCrossRefItem(IFX_DownloadHints *pHints)
3686 {
3687 FX_INT32 iSize = 0;
3688 CFX_ByteString token;
3689 while (1) {
3690 if (!GetNextToken(token)) {
3691 iSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3692 pHints->AddSegment(m_Pos, iSize);
3693 return FALSE;
3694 }
3695 if (token == "trailer") {
3696 m_dwTrailerOffset = m_Pos;
3697 m_docStatus = PDF_DATAAVAIL_TRAILER;
3698 return TRUE;
3699 }
3700 }
3701 }
CheckAllCrossRefStream(IFX_DownloadHints * pHints)3702 FX_BOOL CPDF_DataAvail::CheckAllCrossRefStream(IFX_DownloadHints *pHints)
3703 {
3704 FX_FILESIZE xref_offset = 0;
3705 FX_DWORD dwRet = CheckCrossRefStream(pHints, xref_offset);
3706 if (dwRet == 1) {
3707 if (!xref_offset) {
3708 m_docStatus = PDF_DATAAVAIL_LOADALLCRSOSSREF;
3709 } else {
3710 m_dwCurrentXRefSteam = xref_offset;
3711 m_Pos = xref_offset;
3712 }
3713 return TRUE;
3714 } else if (dwRet == -1) {
3715 m_docStatus = PDF_DATAAVAIL_ERROR;
3716 }
3717 return FALSE;
3718 }
CheckCrossRef(IFX_DownloadHints * pHints)3719 FX_BOOL CPDF_DataAvail::CheckCrossRef(IFX_DownloadHints* pHints)
3720 {
3721 FX_FILESIZE dwSavePos = m_Pos;
3722 FX_INT32 iSize = 0;
3723 CFX_ByteString token;
3724 if (!GetNextToken(token)) {
3725 iSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3726 pHints->AddSegment(m_Pos, iSize);
3727 return FALSE;
3728 }
3729 if (token == "xref") {
3730 m_CrossOffset.InsertAt(0, m_dwXRefOffset);
3731 while (1) {
3732 if (!GetNextToken(token)) {
3733 iSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3734 pHints->AddSegment(m_Pos, iSize);
3735 m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM;
3736 return FALSE;
3737 }
3738 if (token == "trailer") {
3739 m_dwTrailerOffset = m_Pos;
3740 m_docStatus = PDF_DATAAVAIL_TRAILER;
3741 return TRUE;
3742 }
3743 }
3744 } else {
3745 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3746 return TRUE;
3747 }
3748 return FALSE;
3749 }
CheckTrailerAppend(IFX_DownloadHints * pHints)3750 FX_BOOL CPDF_DataAvail::CheckTrailerAppend(IFX_DownloadHints* pHints)
3751 {
3752 if (m_Pos < m_dwFileLen) {
3753 FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos();
3754 FX_INT32 iSize = (FX_INT32)(dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512);
3755 if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) {
3756 pHints->AddSegment(dwAppendPos, iSize);
3757 return FALSE;
3758 }
3759 }
3760 if (m_dwPrevXRefOffset) {
3761 SetStartOffset(m_dwPrevXRefOffset);
3762 m_docStatus = PDF_DATAAVAIL_CROSSREF;
3763 } else {
3764 m_docStatus = PDF_DATAAVAIL_LOADALLCRSOSSREF;
3765 }
3766 return TRUE;
3767 }
CheckTrailer(IFX_DownloadHints * pHints)3768 FX_BOOL CPDF_DataAvail::CheckTrailer(IFX_DownloadHints* pHints)
3769 {
3770 FX_INT32 iTrailerSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3771 if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) {
3772 FX_INT32 iSize = (FX_INT32)(m_Pos + iTrailerSize - m_dwTrailerOffset);
3773 CFX_BinaryBuf buf(iSize);
3774 FX_LPBYTE pBuf = buf.GetBuffer();
3775 if (!pBuf) {
3776 m_docStatus = PDF_DATAAVAIL_ERROR;
3777 return FALSE;
3778 }
3779 if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize)) {
3780 return FALSE;
3781 }
3782 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
3783 m_syntaxParser.InitParser((IFX_FileStream*)file, 0);
3784 CPDF_Object *pTrailer = m_syntaxParser.GetObject(NULL, 0, 0, 0);
3785 if (!pTrailer) {
3786 m_Pos += m_syntaxParser.SavePos();
3787 pHints->AddSegment(m_Pos, iTrailerSize);
3788 return FALSE;
3789 }
3790 CPDF_Dictionary *pTrailerDict = pTrailer->GetDict();
3791 if (pTrailerDict) {
3792 CPDF_Object *pEncrypt = pTrailerDict->GetElement("Encrypt");
3793 if (pEncrypt && pEncrypt->GetType() == PDFOBJ_REFERENCE) {
3794 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3795 pTrailer->Release();
3796 return TRUE;
3797 }
3798 }
3799 FX_DWORD xrefpos = GetDirectInteger(pTrailer->GetDict(), FX_BSTRC("Prev"));
3800 if (xrefpos) {
3801 m_dwPrevXRefOffset = GetDirectInteger(pTrailer->GetDict(), FX_BSTRC("XRefStm"));
3802 pTrailer->Release();
3803 if (m_dwPrevXRefOffset) {
3804 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3805 } else {
3806 m_dwPrevXRefOffset = xrefpos;
3807 if (m_dwPrevXRefOffset >= m_dwFileLen) {
3808 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3809 } else {
3810 SetStartOffset(m_dwPrevXRefOffset);
3811 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
3812 }
3813 }
3814 return TRUE;
3815 } else {
3816 m_dwPrevXRefOffset = 0;
3817 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
3818 pTrailer->Release();
3819 }
3820 return TRUE;
3821 }
3822 pHints->AddSegment(m_Pos, iTrailerSize);
3823 return FALSE;
3824 }
CheckPage(FX_INT32 iPage,IFX_DownloadHints * pHints)3825 FX_BOOL CPDF_DataAvail::CheckPage(FX_INT32 iPage, IFX_DownloadHints* pHints)
3826 {
3827 while (TRUE) {
3828 switch (m_docStatus) {
3829 case PDF_DATAAVAIL_PAGETREE:
3830 if (!LoadDocPages(pHints)) {
3831 return FALSE;
3832 }
3833 break;
3834 case PDF_DATAAVAIL_PAGE:
3835 if (!LoadDocPage(iPage, pHints)) {
3836 return FALSE;
3837 }
3838 break;
3839 case PDF_DATAAVAIL_ERROR:
3840 return LoadAllFile(pHints);
3841 default:
3842 m_bPagesTreeLoad = TRUE;
3843 m_bPagesLoad = TRUE;
3844 m_bCurPageDictLoadOK = TRUE;
3845 m_docStatus = PDF_DATAAVAIL_PAGE;
3846 return TRUE;
3847 }
3848 }
3849 }
CheckArrayPageNode(FX_DWORD dwPageNo,CPDF_PageNode * pPageNode,IFX_DownloadHints * pHints)3850 FX_BOOL CPDF_DataAvail::CheckArrayPageNode(FX_DWORD dwPageNo, CPDF_PageNode *pPageNode, IFX_DownloadHints* pHints)
3851 {
3852 FX_BOOL bExist = FALSE;
3853 CPDF_Object *pPages = GetObject(dwPageNo, pHints, &bExist);
3854 if (!bExist) {
3855 m_docStatus = PDF_DATAAVAIL_ERROR;
3856 return FALSE;
3857 }
3858 if (!pPages) {
3859 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3860 m_docStatus = PDF_DATAAVAIL_ERROR;
3861 return FALSE;
3862 }
3863 return FALSE;
3864 }
3865 if (pPages->GetType() != PDFOBJ_ARRAY) {
3866 pPages->Release();
3867 m_docStatus = PDF_DATAAVAIL_ERROR;
3868 return FALSE;
3869 }
3870 pPageNode->m_type = PDF_PAGENODE_PAGES;
3871 CPDF_Array* pArray = (CPDF_Array*)pPages;
3872 for (FX_DWORD i = 0; i < pArray->GetCount(); ++i) {
3873 CPDF_Object *pKid = (CPDF_Object *)pArray->GetElement(i);
3874 if (!pKid || pKid->GetType() != PDFOBJ_REFERENCE) {
3875 continue;
3876 }
3877 CPDF_PageNode *pNode = FX_NEW CPDF_PageNode();
3878 pPageNode->m_childNode.Add(pNode);
3879 pNode->m_dwPageNo = ((CPDF_Reference*)pKid)->GetRefObjNum();
3880 }
3881 pPages->Release();
3882 return TRUE;
3883 }
CheckUnkownPageNode(FX_DWORD dwPageNo,CPDF_PageNode * pPageNode,IFX_DownloadHints * pHints)3884 FX_BOOL CPDF_DataAvail::CheckUnkownPageNode(FX_DWORD dwPageNo, CPDF_PageNode *pPageNode, IFX_DownloadHints* pHints)
3885 {
3886 FX_BOOL bExist = FALSE;
3887 CPDF_Object *pPage = GetObject(dwPageNo, pHints, &bExist);
3888 if (!bExist) {
3889 m_docStatus = PDF_DATAAVAIL_ERROR;
3890 return FALSE;
3891 }
3892 if (!pPage) {
3893 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3894 m_docStatus = PDF_DATAAVAIL_ERROR;
3895 return FALSE;
3896 }
3897 return FALSE;
3898 }
3899 if (pPage->GetType() == PDFOBJ_ARRAY) {
3900 pPageNode->m_dwPageNo = dwPageNo;
3901 pPageNode->m_type = PDF_PAGENODE_ARRAY;
3902 pPage->Release();
3903 return TRUE;
3904 }
3905 if (pPage->GetType() != PDFOBJ_DICTIONARY) {
3906 pPage->Release();
3907 m_docStatus = PDF_DATAAVAIL_ERROR;
3908 return FALSE;
3909 }
3910 pPageNode->m_dwPageNo = dwPageNo;
3911 CFX_ByteString type = pPage->GetDict()->GetString(FX_BSTRC("Type"));
3912 if (type == FX_BSTRC("Pages")) {
3913 pPageNode->m_type = PDF_PAGENODE_PAGES;
3914 CPDF_Object *pKids = pPage->GetDict()->GetElement(FX_BSTRC("Kids"));
3915 if (!pKids) {
3916 m_docStatus = PDF_DATAAVAIL_PAGE;
3917 return TRUE;
3918 }
3919 switch (pKids->GetType()) {
3920 case PDFOBJ_REFERENCE: {
3921 CPDF_Reference *pKid = (CPDF_Reference *)pKids;
3922 CPDF_PageNode *pNode = FX_NEW CPDF_PageNode();
3923 pPageNode->m_childNode.Add(pNode);
3924 pNode->m_dwPageNo = pKid->GetRefObjNum();
3925 }
3926 break;
3927 case PDFOBJ_ARRAY: {
3928 CPDF_Array *pKidsArray = (CPDF_Array *)pKids;
3929 for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) {
3930 CPDF_Object *pKid = (CPDF_Object *)pKidsArray->GetElement(i);
3931 if (!pKid || pKid->GetType() != PDFOBJ_REFERENCE) {
3932 continue;
3933 }
3934 CPDF_PageNode *pNode = FX_NEW CPDF_PageNode();
3935 pPageNode->m_childNode.Add(pNode);
3936 pNode->m_dwPageNo = ((CPDF_Reference*)pKid)->GetRefObjNum();
3937 }
3938 }
3939 break;
3940 default:
3941 break;
3942 }
3943 } else if (type == FX_BSTRC("Page")) {
3944 pPageNode->m_type = PDF_PAGENODE_PAGE;
3945 } else {
3946 pPage->Release();
3947 m_docStatus = PDF_DATAAVAIL_ERROR;
3948 return FALSE;
3949 }
3950 pPage->Release();
3951 return TRUE;
3952 }
CheckPageNode(CPDF_PageNode & pageNodes,FX_INT32 iPage,FX_INT32 & iCount,IFX_DownloadHints * pHints)3953 FX_BOOL CPDF_DataAvail::CheckPageNode(CPDF_PageNode &pageNodes, FX_INT32 iPage, FX_INT32 &iCount, IFX_DownloadHints* pHints)
3954 {
3955 FX_INT32 iSize = pageNodes.m_childNode.GetSize();
3956 if (!iSize) {
3957 m_docStatus = PDF_DATAAVAIL_ERROR;
3958 return FALSE;
3959 }
3960 for (FX_INT32 i = 0; i < iSize; ++i) {
3961 CPDF_PageNode *pNode = (CPDF_PageNode*)pageNodes.m_childNode.GetAt(i);
3962 if (!pNode) {
3963 continue;
3964 }
3965 switch (pNode->m_type) {
3966 case PDF_PAGENODE_UNKOWN:
3967 if (!CheckUnkownPageNode(pNode->m_dwPageNo, pNode, pHints)) {
3968 return FALSE;
3969 }
3970 --i;
3971 break;
3972 case PDF_PAGENODE_PAGE:
3973 iCount++;
3974 if (iPage == iCount && m_pDocument) {
3975 m_pDocument->m_PageList.SetAt(iPage, pNode->m_dwPageNo);
3976 }
3977 break;
3978 case PDF_PAGENODE_PAGES:
3979 if (!CheckPageNode(*pNode, iPage, iCount, pHints)) {
3980 return FALSE;
3981 }
3982 break;
3983 case PDF_PAGENODE_ARRAY:
3984 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) {
3985 return FALSE;
3986 }
3987 --i;
3988 break;
3989 }
3990 if (iPage == iCount) {
3991 m_docStatus = PDF_DATAAVAIL_DONE;
3992 return TRUE;
3993 }
3994 }
3995 return TRUE;
3996 }
LoadDocPage(FX_INT32 iPage,IFX_DownloadHints * pHints)3997 FX_BOOL CPDF_DataAvail::LoadDocPage(FX_INT32 iPage, IFX_DownloadHints* pHints)
3998 {
3999 if (m_pDocument->GetPageCount() <= iPage || m_pDocument->m_PageList.GetAt(iPage)) {
4000 m_docStatus = PDF_DATAAVAIL_DONE;
4001 return TRUE;
4002 }
4003 if (m_pageNodes.m_type == PDF_PAGENODE_PAGE) {
4004 if (iPage == 0) {
4005 m_docStatus = PDF_DATAAVAIL_DONE;
4006 return TRUE;
4007 }
4008 m_docStatus = PDF_DATAAVAIL_ERROR;
4009 return TRUE;
4010 }
4011 FX_INT32 iCount = -1;
4012 return CheckPageNode(m_pageNodes, iPage, iCount, pHints);
4013 }
CheckPageCount(IFX_DownloadHints * pHints)4014 FX_BOOL CPDF_DataAvail::CheckPageCount(IFX_DownloadHints* pHints)
4015 {
4016 FX_BOOL bExist = FALSE;
4017 CPDF_Object *pPages = GetObject(m_PagesObjNum, pHints, &bExist);
4018 if (!bExist) {
4019 m_docStatus = PDF_DATAAVAIL_ERROR;
4020 return FALSE;
4021 }
4022 if (!pPages) {
4023 return FALSE;
4024 }
4025 CPDF_Dictionary* pPagesDict = pPages->GetDict();
4026 if (!pPagesDict) {
4027 pPages->Release();
4028 m_docStatus = PDF_DATAAVAIL_ERROR;
4029 return FALSE;
4030 }
4031 if (!pPagesDict->KeyExist(FX_BSTRC("Kids"))) {
4032 pPages->Release();
4033 return TRUE;
4034 }
4035 int count = pPagesDict->GetInteger(FX_BSTRC("Count"));
4036 if (count > 0) {
4037 pPages->Release();
4038 return TRUE;
4039 }
4040 pPages->Release();
4041 return FALSE;
4042 }
LoadDocPages(IFX_DownloadHints * pHints)4043 FX_BOOL CPDF_DataAvail::LoadDocPages(IFX_DownloadHints* pHints)
4044 {
4045 if (!CheckUnkownPageNode(m_PagesObjNum, &m_pageNodes, pHints)) {
4046 return FALSE;
4047 }
4048 if (CheckPageCount(pHints)) {
4049 m_docStatus = PDF_DATAAVAIL_PAGE;
4050 return TRUE;
4051 } else {
4052 m_bTotalLoadPageTree = TRUE;
4053 }
4054 return FALSE;
4055 }
LoadPages(IFX_DownloadHints * pHints)4056 FX_BOOL CPDF_DataAvail::LoadPages(IFX_DownloadHints* pHints)
4057 {
4058 while (!m_bPagesTreeLoad) {
4059 if (!CheckPageStatus(pHints)) {
4060 return FALSE;
4061 }
4062 }
4063 if (m_bPagesLoad) {
4064 return TRUE;
4065 }
4066 m_pDocument->LoadPages();
4067 return FALSE;
4068 }
CheckLinearizedData(IFX_DownloadHints * pHints)4069 FX_BOOL CPDF_DataAvail::CheckLinearizedData(IFX_DownloadHints* pHints)
4070 {
4071 if (m_bLinearedDataOK) {
4072 return TRUE;
4073 }
4074 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset))) {
4075 pHints->AddSegment(m_dwLastXRefOffset, (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset));
4076 return FALSE;
4077 }
4078 FX_DWORD dwRet = 0;
4079 if (!m_bMainXRefLoad) {
4080 dwRet = ((CPDF_Parser *)m_pDocument->GetParser())->LoadLinearizedMainXRefTable();
4081 if (dwRet == PDFPARSE_ERROR_SUCCESS) {
4082 if (!PreparePageItem()) {
4083 return FALSE;
4084 }
4085 m_bMainXRefLoadedOK = TRUE;
4086 }
4087 m_bMainXRefLoad = TRUE;
4088 }
4089 m_bLinearedDataOK = TRUE;
4090 return TRUE;
4091 }
CheckPageAnnots(FX_INT32 iPage,IFX_DownloadHints * pHints)4092 FX_BOOL CPDF_DataAvail::CheckPageAnnots(FX_INT32 iPage, IFX_DownloadHints* pHints)
4093 {
4094 if (!m_objs_array.GetSize()) {
4095 m_objs_array.RemoveAll();
4096 m_objnum_array.RemoveAll();
4097 CPDF_Dictionary *pPageDict = m_pDocument->GetPage(iPage);
4098 if (!pPageDict) {
4099 return TRUE;
4100 }
4101 CPDF_Object *pAnnots = pPageDict->GetElement(FX_BSTRC("Annots"));
4102 if (!pAnnots) {
4103 return TRUE;
4104 }
4105 CFX_PtrArray obj_array;
4106 obj_array.Add(pAnnots);
4107 FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array);
4108 if (bRet) {
4109 m_objs_array.RemoveAll();
4110 }
4111 return bRet;
4112 } else {
4113 CFX_PtrArray new_objs_array;
4114 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4115 m_objs_array.RemoveAll();
4116 if (!bRet) {
4117 m_objs_array.Append(new_objs_array);
4118 }
4119 return bRet;
4120 }
4121 }
CheckLinearizedFirstPage(FX_INT32 iPage,IFX_DownloadHints * pHints)4122 FX_BOOL CPDF_DataAvail::CheckLinearizedFirstPage(FX_INT32 iPage, IFX_DownloadHints* pHints)
4123 {
4124 if (!m_bAnnotsLoad) {
4125 if (!CheckPageAnnots(iPage, pHints)) {
4126 return FALSE;
4127 }
4128 m_bAnnotsLoad = TRUE;
4129 }
4130 if (m_bAnnotsLoad)
4131 if (!CheckLinearizedData(pHints)) {
4132 return FALSE;
4133 }
4134 m_bPageLoadedOK = FALSE;
4135 return TRUE;
4136 }
HaveResourceAncestor(CPDF_Dictionary * pDict)4137 FX_BOOL CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary *pDict)
4138 {
4139 CPDF_Object *pParent = pDict->GetElement("Parent");
4140 if (!pParent) {
4141 return FALSE;
4142 }
4143 CPDF_Dictionary *pParentDict = pParent->GetDict();
4144 if (!pParentDict) {
4145 return FALSE;
4146 }
4147 CPDF_Object *pRet = pParentDict->GetElement("Resource");
4148 if (pRet) {
4149 m_pPageResource = pRet;
4150 return TRUE;
4151 } else {
4152 return HaveResourceAncestor(pParentDict);
4153 }
4154 }
IsPageAvail(FX_INT32 iPage,IFX_DownloadHints * pHints)4155 FX_BOOL CPDF_DataAvail::IsPageAvail(FX_INT32 iPage, IFX_DownloadHints* pHints)
4156 {
4157 if (!m_pDocument) {
4158 return FALSE;
4159 }
4160 if (IsFirstCheck(iPage)) {
4161 m_bCurPageDictLoadOK = FALSE;
4162 m_bPageLoadedOK = FALSE;
4163 m_bAnnotsLoad = FALSE;
4164 m_bNeedDownLoadResource = FALSE;
4165 m_objs_array.RemoveAll();
4166 m_objnum_array.RemoveAll();
4167 }
4168 if (m_pagesLoadState == NULL) {
4169 m_pagesLoadState = FX_NEW CFX_CMapDWordToDWord();
4170 }
4171 FX_DWORD dwPageLoad = 0;
4172 if (m_pagesLoadState->Lookup(iPage, dwPageLoad) && dwPageLoad != 0) {
4173 return TRUE;
4174 }
4175 if (m_bLinearized) {
4176 if ((FX_DWORD)iPage == m_dwFirstPageNo) {
4177 m_pagesLoadState->SetAt(iPage, TRUE);
4178 return TRUE;
4179 }
4180 if (!CheckLinearizedData(pHints)) {
4181 return FALSE;
4182 }
4183 if (m_bMainXRefLoadedOK) {
4184 if (m_bTotalLoadPageTree) {
4185 if (!LoadPages(pHints)) {
4186 return FALSE;
4187 }
4188 } else {
4189 if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) {
4190 return FALSE;
4191 }
4192 }
4193 } else {
4194 if (!LoadAllFile(pHints)) {
4195 return FALSE;
4196 }
4197 ((CPDF_Parser *)m_pDocument->GetParser())->RebuildCrossRef();
4198 ResetFirstCheck(iPage);
4199 return TRUE;
4200 }
4201 } else {
4202 if (!m_bTotalLoadPageTree) {
4203 if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) {
4204 return FALSE;
4205 }
4206 }
4207 }
4208 if (m_bHaveAcroForm && !m_bAcroFormLoad) {
4209 if (!CheckAcroFormSubObject(pHints)) {
4210 return FALSE;
4211 }
4212 m_bAcroFormLoad = TRUE;
4213 }
4214 if (!m_bPageLoadedOK) {
4215 if (!m_objs_array.GetSize()) {
4216 m_objs_array.RemoveAll();
4217 m_objnum_array.RemoveAll();
4218 m_pPageDict = m_pDocument->GetPage(iPage);
4219 if (!m_pPageDict) {
4220 ResetFirstCheck(iPage);
4221 return TRUE;
4222 }
4223 CFX_PtrArray obj_array;
4224 obj_array.Add(m_pPageDict);
4225 FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array);
4226 if (bRet) {
4227 m_objs_array.RemoveAll();
4228 m_bPageLoadedOK = TRUE;
4229 } else {
4230 return bRet;
4231 }
4232 } else {
4233 CFX_PtrArray new_objs_array;
4234 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4235 m_objs_array.RemoveAll();
4236 if (bRet) {
4237 m_bPageLoadedOK = TRUE;
4238 } else {
4239 m_objs_array.Append(new_objs_array);
4240 return bRet;
4241 }
4242 }
4243 }
4244 if (m_bPageLoadedOK) {
4245 if (!m_bAnnotsLoad) {
4246 if (!CheckPageAnnots(iPage, pHints)) {
4247 return FALSE;
4248 }
4249 m_bAnnotsLoad = TRUE;
4250 }
4251 }
4252 if (m_pPageDict && !m_bNeedDownLoadResource) {
4253 CPDF_Object *pRes = m_pPageDict->GetElement("Resource");
4254 if (!pRes) {
4255 m_bNeedDownLoadResource = HaveResourceAncestor(m_pPageDict);
4256 }
4257 m_bNeedDownLoadResource = FALSE;
4258 }
4259 if (m_bNeedDownLoadResource) {
4260 FX_BOOL bRet = CheckResources(pHints);
4261 if (!bRet) {
4262 return FALSE;
4263 }
4264 m_bNeedDownLoadResource = FALSE;
4265 }
4266 m_bPageLoadedOK = FALSE;
4267 m_bAnnotsLoad = FALSE;
4268 m_bCurPageDictLoadOK = FALSE;
4269 ResetFirstCheck(iPage);
4270 m_pagesLoadState->SetAt(iPage, TRUE);
4271 return TRUE;
4272 }
CheckResources(IFX_DownloadHints * pHints)4273 FX_BOOL CPDF_DataAvail::CheckResources(IFX_DownloadHints* pHints)
4274 {
4275 if (!m_objs_array.GetSize()) {
4276 m_objs_array.RemoveAll();
4277 CFX_PtrArray obj_array;
4278 obj_array.Add(m_pPageResource);
4279 FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array);
4280 if (bRet) {
4281 m_objs_array.RemoveAll();
4282 }
4283 return bRet;
4284 } else {
4285 CFX_PtrArray new_objs_array;
4286 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4287 m_objs_array.RemoveAll();
4288 if (!bRet) {
4289 m_objs_array.Append(new_objs_array);
4290 }
4291 return bRet;
4292 }
4293 }
GetLinearizedMainXRefInfo(FX_FILESIZE * pPos,FX_DWORD * pSize)4294 void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE *pPos, FX_DWORD *pSize)
4295 {
4296 if (pPos) {
4297 *pPos = m_dwLastXRefOffset;
4298 }
4299 if (pSize) {
4300 *pSize = (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset);
4301 }
4302 }
IsFormAvail(IFX_DownloadHints * pHints)4303 FX_INT32 CPDF_DataAvail::IsFormAvail(IFX_DownloadHints *pHints)
4304 {
4305 if (!m_pDocument) {
4306 return PDFFORM_AVAIL;
4307 }
4308 if (!m_bLinearizedFormParamLoad) {
4309 CPDF_Dictionary *pRoot = m_pDocument->GetRoot();
4310 if (!pRoot) {
4311 return PDFFORM_AVAIL;
4312 }
4313 CPDF_Object *pAcroForm = pRoot->GetElement(FX_BSTRC("AcroForm"));
4314 if (!pAcroForm) {
4315 return PDFFORM_NOTEXIST;
4316 }
4317 if (!m_bMainXRefLoad && !CheckLinearizedData(pHints)) {
4318 return PDFFORM_NOTAVAIL;
4319 }
4320 if (!m_objs_array.GetSize()) {
4321 m_objs_array.Add(pAcroForm->GetDict());
4322 }
4323 m_bLinearizedFormParamLoad = TRUE;
4324 }
4325 CFX_PtrArray new_objs_array;
4326 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4327 m_objs_array.RemoveAll();
4328 if (!bRet) {
4329 m_objs_array.Append(new_objs_array);
4330 return PDFFORM_NOTAVAIL;
4331 }
4332 return PDFFORM_AVAIL;
4333 }
AddObjNum(FX_DWORD dwObjNum)4334 void CPDF_SortObjNumArray::AddObjNum(FX_DWORD dwObjNum)
4335 {
4336 FX_INT32 iNext = 0;
4337 if (BinarySearch(dwObjNum, iNext)) {
4338 return;
4339 }
4340 m_number_array.InsertAt(iNext, dwObjNum);
4341 }
Find(FX_DWORD dwObjNum)4342 FX_BOOL CPDF_SortObjNumArray::Find(FX_DWORD dwObjNum)
4343 {
4344 FX_INT32 iNext = 0;
4345 return BinarySearch(dwObjNum, iNext);
4346 }
BinarySearch(FX_DWORD value,FX_INT32 & iNext)4347 FX_BOOL CPDF_SortObjNumArray::BinarySearch(FX_DWORD value, FX_INT32 &iNext)
4348 {
4349 FX_INT32 iLen = m_number_array.GetSize();
4350 FX_INT32 iLow = 0;
4351 FX_INT32 iHigh = iLen - 1;
4352 FX_INT32 iMid = 0;
4353 while (iLow <= iHigh) {
4354 iMid = (iLow + iHigh) / 2;
4355 FX_DWORD tt = m_number_array.GetAt(iMid);
4356 if (m_number_array.GetAt(iMid) == value) {
4357 iNext = iMid;
4358 return TRUE;
4359 } else if (m_number_array.GetAt(iMid) > value) {
4360 iHigh = iMid - 1;
4361 } else if (m_number_array.GetAt(iMid) < value) {
4362 iLow = iMid + 1;
4363 }
4364 }
4365 iNext = iLow;
4366 return FALSE;
4367 }
~CPDF_PageNode()4368 CPDF_PageNode::~CPDF_PageNode()
4369 {
4370 FX_INT32 iSize = m_childNode.GetSize();
4371 for (FX_INT32 i = 0; i < iSize; ++i) {
4372 CPDF_PageNode *pNode = (CPDF_PageNode*)m_childNode[i];
4373 if (pNode) {
4374 delete pNode;
4375 }
4376 }
4377 m_childNode.RemoveAll();
4378 }
4379