• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
8 
9 #include <limits>
10 
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
16 #include "core/fpdfapi/parser/cpdf_parser.h"
17 #include "core/fpdfapi/parser/cpdf_read_validator.h"
18 #include "core/fpdfapi/parser/cpdf_stream.h"
19 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
20 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
21 #include "core/fxcrt/cfx_bitstream.h"
22 #include "core/fxcrt/check.h"
23 #include "core/fxcrt/fx_safe_types.h"
24 #include "core/fxcrt/span.h"
25 
26 namespace {
27 
CanReadFromBitStream(const CFX_BitStream * hStream,const FX_SAFE_UINT32 & bits)28 bool CanReadFromBitStream(const CFX_BitStream* hStream,
29                           const FX_SAFE_UINT32& bits) {
30   return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie();
31 }
32 
33 // Sanity check values from the page table header. The note in the PDF 1.7
34 // reference for Table F.3 says the valid range is only 0 through 32. Though 0
35 // is not useful either.
IsValidPageOffsetHintTableBitCount(uint32_t bits)36 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
37   return bits > 0 && bits <= 32;
38 }
39 
40 }  // namespace
41 
42 CPDF_HintTables::PageInfo::PageInfo() = default;
43 CPDF_HintTables::PageInfo::~PageInfo() = default;
44 
45 //  static
Parse(CPDF_SyntaxParser * parser,const CPDF_LinearizedHeader * pLinearized)46 std::unique_ptr<CPDF_HintTables> CPDF_HintTables::Parse(
47     CPDF_SyntaxParser* parser,
48     const CPDF_LinearizedHeader* pLinearized) {
49   DCHECK(parser);
50   if (!pLinearized || pLinearized->GetPageCount() <= 1 ||
51       !pLinearized->HasHintTable()) {
52     return nullptr;
53   }
54 
55   const FX_FILESIZE szHintStart = pLinearized->GetHintStart();
56   const uint32_t szHintLength = pLinearized->GetHintLength();
57 
58   if (!parser->GetValidator()->CheckDataRangeAndRequestIfUnavailable(
59           szHintStart, szHintLength)) {
60     return nullptr;
61   }
62 
63   parser->SetPos(szHintStart);
64   RetainPtr<CPDF_Stream> hints_stream = ToStream(
65       parser->GetIndirectObject(nullptr, CPDF_SyntaxParser::ParseType::kLoose));
66 
67   if (!hints_stream)
68     return nullptr;
69 
70   auto pHintTables = std::make_unique<CPDF_HintTables>(
71       parser->GetValidator().Get(), pLinearized);
72   if (!pHintTables->LoadHintStream(hints_stream.Get()))
73     return nullptr;
74 
75   return pHintTables;
76 }
77 
CPDF_HintTables(CPDF_ReadValidator * pValidator,const CPDF_LinearizedHeader * pLinearized)78 CPDF_HintTables::CPDF_HintTables(CPDF_ReadValidator* pValidator,
79                                  const CPDF_LinearizedHeader* pLinearized)
80     : m_pValidator(pValidator), m_pLinearized(pLinearized) {
81   DCHECK(m_pLinearized);
82 }
83 
84 CPDF_HintTables::~CPDF_HintTables() = default;
85 
ReadPageHintTable(CFX_BitStream * hStream)86 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
87   const uint32_t nPages = m_pLinearized->GetPageCount();
88   if (nPages < 1 || nPages >= CPDF_Document::kPageMaxNum)
89     return false;
90 
91   const uint32_t nFirstPageNum = m_pLinearized->GetFirstPageNo();
92   if (nFirstPageNum >= nPages)
93     return false;
94 
95   if (!hStream || hStream->IsEOF())
96     return false;
97 
98   const uint32_t kHeaderSize = 288;
99   if (hStream->BitsRemaining() < kHeaderSize)
100     return false;
101 
102   // Item 1: The least number of objects in a page.
103   const uint32_t dwObjLeastNum = hStream->GetBits(32);
104   if (!dwObjLeastNum || dwObjLeastNum >= CPDF_Parser::kMaxObjectNumber)
105     return false;
106 
107   // Item 2: The location of the first page's page object.
108   const FX_FILESIZE szFirstObjLoc =
109       HintsOffsetToFileOffset(hStream->GetBits(32));
110   if (!szFirstObjLoc)
111     return false;
112 
113   m_szFirstPageObjOffset = szFirstObjLoc;
114 
115   // Item 3: The number of bits needed to represent the difference
116   // between the greatest and least number of objects in a page.
117   const uint32_t dwDeltaObjectsBits = hStream->GetBits(16);
118   if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits))
119     return false;
120 
121   // Item 4: The least length of a page in bytes.
122   const uint32_t dwPageLeastLen = hStream->GetBits(32);
123   if (!dwPageLeastLen)
124     return false;
125 
126   // Item 5: The number of bits needed to represent the difference
127   // between the greatest and least length of a page, in bytes.
128   const uint32_t dwDeltaPageLenBits = hStream->GetBits(16);
129   if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits))
130     return false;
131 
132   // Skip Item 6, 7, 8, 9 total 96 bits.
133   hStream->SkipBits(96);
134 
135   // Item 10: The number of bits needed to represent the greatest
136   // number of shared object references.
137   const uint32_t dwSharedObjBits = hStream->GetBits(16);
138   if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits))
139     return false;
140 
141   // Item 11: The number of bits needed to represent the numerically
142   // greatest shared object identifier used by the pages.
143   const uint32_t dwSharedIdBits = hStream->GetBits(16);
144   if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits))
145     return false;
146 
147   // Item 12: The number of bits needed to represent the numerator of
148   // the fractional position for each shared object reference. For each
149   // shared object referenced from a page, there is an indication of
150   // where in the page's content stream the object is first referenced.
151   const uint32_t dwSharedNumeratorBits = hStream->GetBits(16);
152   if (dwSharedNumeratorBits > 32)
153     return false;
154 
155   // Item 13: Skip Item 13 which has 16 bits.
156   hStream->SkipBits(16);
157 
158   FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits;
159   required_bits *= nPages;
160   if (!CanReadFromBitStream(hStream, required_bits))
161     return false;
162 
163   m_PageInfos = std::vector<PageInfo>(nPages);
164   m_PageInfos[nFirstPageNum].set_start_obj_num(
165       m_pLinearized->GetFirstPageObjNum());
166   // The object number of remaining pages starts from 1.
167   FX_SAFE_UINT32 dwStartObjNum = 1;
168   for (uint32_t i = 0; i < nPages; ++i) {
169     FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
170     safeDeltaObj += dwObjLeastNum;
171     if (!safeDeltaObj.IsValid())
172       return false;
173     m_PageInfos[i].set_objects_count(safeDeltaObj.ValueOrDie());
174     if (i == nFirstPageNum)
175       continue;
176     m_PageInfos[i].set_start_obj_num(dwStartObjNum.ValueOrDie());
177     dwStartObjNum += m_PageInfos[i].objects_count();
178     if (!dwStartObjNum.IsValid() ||
179         dwStartObjNum.ValueOrDie() >= CPDF_Parser::kMaxObjectNumber) {
180       return false;
181     }
182   }
183   hStream->ByteAlign();
184 
185   required_bits = dwDeltaPageLenBits;
186   required_bits *= nPages;
187   if (!CanReadFromBitStream(hStream, required_bits))
188     return false;
189 
190   for (uint32_t i = 0; i < nPages; ++i) {
191     FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits);
192     safePageLen += dwPageLeastLen;
193     if (!safePageLen.IsValid())
194       return false;
195     m_PageInfos[i].set_page_length(safePageLen.ValueOrDie());
196   }
197 
198   DCHECK(m_szFirstPageObjOffset);
199   m_PageInfos[nFirstPageNum].set_page_offset(m_szFirstPageObjOffset);
200   FX_FILESIZE prev_page_end = m_pLinearized->GetFirstPageEndOffset();
201   for (uint32_t i = 0; i < nPages; ++i) {
202     if (i == nFirstPageNum)
203       continue;
204     m_PageInfos[i].set_page_offset(prev_page_end);
205     prev_page_end += m_PageInfos[i].page_length();
206   }
207   hStream->ByteAlign();
208 
209   // Number of shared objects.
210   required_bits = dwSharedObjBits;
211   required_bits *= nPages;
212   if (!CanReadFromBitStream(hStream, required_bits))
213     return false;
214 
215   std::vector<uint32_t> dwNSharedObjsArray(nPages);
216   for (uint32_t i = 0; i < nPages; i++)
217     dwNSharedObjsArray[i] = hStream->GetBits(dwSharedObjBits);
218   hStream->ByteAlign();
219 
220   // Array of identifiers, size = nshared_objects.
221   for (uint32_t i = 0; i < nPages; i++) {
222     required_bits = dwSharedIdBits;
223     required_bits *= dwNSharedObjsArray[i];
224     if (!CanReadFromBitStream(hStream, required_bits))
225       return false;
226 
227     for (uint32_t j = 0; j < dwNSharedObjsArray[i]; j++)
228       m_PageInfos[i].AddIdentifier(hStream->GetBits(dwSharedIdBits));
229   }
230   hStream->ByteAlign();
231 
232   if (dwSharedNumeratorBits) {
233     for (uint32_t i = 0; i < nPages; i++) {
234       FX_SAFE_UINT32 safeSize = dwNSharedObjsArray[i];
235       safeSize *= dwSharedNumeratorBits;
236       if (!CanReadFromBitStream(hStream, safeSize))
237         return false;
238 
239       hStream->SkipBits(safeSize.ValueOrDie());
240     }
241     hStream->ByteAlign();
242   }
243 
244   FX_SAFE_UINT32 safeTotalPageLen = nPages;
245   safeTotalPageLen *= dwDeltaPageLenBits;
246   if (!CanReadFromBitStream(hStream, safeTotalPageLen))
247     return false;
248 
249   hStream->SkipBits(safeTotalPageLen.ValueOrDie());
250   hStream->ByteAlign();
251   return true;
252 }
253 
ReadSharedObjHintTable(CFX_BitStream * hStream,uint32_t offset)254 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
255                                              uint32_t offset) {
256   if (!hStream || hStream->IsEOF())
257     return false;
258 
259   FX_SAFE_UINT32 bit_offset = offset;
260   bit_offset *= 8;
261   if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
262     return false;
263   hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie());
264 
265   const uint32_t kHeaderSize = 192;
266   if (hStream->BitsRemaining() < kHeaderSize)
267     return false;
268 
269   // Item 1: The object number of the first object in the shared objects
270   // section.
271   uint32_t dwFirstSharedObjNum = hStream->GetBits(32);
272   if (!dwFirstSharedObjNum)
273     return false;
274 
275   // Item 2: The location of the first object in the shared objects section.
276   const FX_FILESIZE szFirstSharedObjLoc =
277       HintsOffsetToFileOffset(hStream->GetBits(32));
278   if (!szFirstSharedObjLoc)
279     return false;
280 
281   // Item 3: The number of shared object entries for the first page.
282   m_nFirstPageSharedObjs = hStream->GetBits(32);
283 
284   // Item 4: The number of shared object entries for the shared objects
285   // section, including the number of shared object entries for the first page.
286   uint32_t dwSharedObjTotal = hStream->GetBits(32);
287 
288   // Item 5: The number of bits needed to represent the greatest number of
289   // objects in a shared object group.
290   uint32_t dwSharedObjNumBits = hStream->GetBits(16);
291   if (dwSharedObjNumBits > 32)
292     return false;
293 
294   // Item 6: The least length of a shared object group in bytes.
295   uint32_t dwGroupLeastLen = hStream->GetBits(32);
296 
297   // Item 7: The number of bits needed to represent the difference between the
298   // greatest and least length of a shared object group, in bytes.
299   uint32_t dwDeltaGroupLen = hStream->GetBits(16);
300 
301   // Trying to decode more than 32 bits isn't going to work when we write into
302   // a uint32_t. Decoding 0 bits also makes no sense.
303   if (!IsValidPageOffsetHintTableBitCount(dwDeltaGroupLen))
304     return false;
305 
306   if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber ||
307       m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber ||
308       dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) {
309     return false;
310   }
311 
312   FX_SAFE_UINT32 required_bits = dwSharedObjTotal;
313   required_bits *= dwDeltaGroupLen;
314   if (!CanReadFromBitStream(hStream, required_bits))
315     return false;
316 
317   if (dwSharedObjTotal > 0) {
318     uint32_t dwLastSharedObj = dwSharedObjTotal - 1;
319     if (dwLastSharedObj > m_nFirstPageSharedObjs) {
320       FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum;
321       safeObjNum += dwLastSharedObj - m_nFirstPageSharedObjs;
322       if (!safeObjNum.IsValid())
323         return false;
324     }
325   }
326 
327   m_SharedObjGroupInfos.resize(dwSharedObjTotal);
328   // Table F.6 - Shared object hint table, shared object group entries:
329   // Item 1: A number that, when added to the least shared object
330   // group length.
331   FX_SAFE_FILESIZE prev_shared_group_end_offset = m_szFirstPageObjOffset;
332   for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
333     if (i == m_nFirstPageSharedObjs)
334       prev_shared_group_end_offset = szFirstSharedObjLoc;
335 
336     FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen);
337     safeObjLen += dwGroupLeastLen;
338     if (!safeObjLen.IsValid())
339       return false;
340 
341     m_SharedObjGroupInfos[i].m_dwLength = safeObjLen.ValueOrDie();
342     m_SharedObjGroupInfos[i].m_szOffset =
343         prev_shared_group_end_offset.ValueOrDie();
344     prev_shared_group_end_offset += m_SharedObjGroupInfos[i].m_dwLength;
345     if (!prev_shared_group_end_offset.IsValid())
346       return false;
347   }
348 
349   hStream->ByteAlign();
350   {
351     // Item 2: A flag indicating whether the shared object signature (item 3) is
352     // present.
353     uint32_t signature_count = 0;
354     for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
355       signature_count += hStream->GetBits(1);
356     }
357     hStream->ByteAlign();
358     // Item 3: (Only if item 2 is 1) The shared object signature, a 16-byte MD5
359     // hash that uniquely identifies the resource that the group of objects
360     // represents.
361     if (signature_count) {
362       required_bits = signature_count;
363       required_bits *= 128;
364       if (!CanReadFromBitStream(hStream, required_bits))
365         return false;
366 
367       hStream->SkipBits(required_bits.ValueOrDie());
368       hStream->ByteAlign();
369     }
370   }
371   // Item 4: A number equal to 1 less than the number of objects in the group.
372   FX_SAFE_UINT32 cur_obj_num = m_pLinearized->GetFirstPageObjNum();
373   for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
374     if (i == m_nFirstPageSharedObjs)
375       cur_obj_num = dwFirstSharedObjNum;
376 
377     FX_SAFE_UINT32 obj_count =
378         dwSharedObjNumBits ? hStream->GetBits(dwSharedObjNumBits) : 0;
379     obj_count += 1;
380     if (!obj_count.IsValid())
381       return false;
382 
383     uint32_t obj_num = cur_obj_num.ValueOrDie();
384     cur_obj_num += obj_count.ValueOrDie();
385     if (!cur_obj_num.IsValid())
386       return false;
387 
388     m_SharedObjGroupInfos[i].m_dwStartObjNum = obj_num;
389     m_SharedObjGroupInfos[i].m_dwObjectsCount = obj_count.ValueOrDie();
390   }
391 
392   hStream->ByteAlign();
393   return true;
394 }
395 
GetPagePos(uint32_t index,FX_FILESIZE * szPageStartPos,FX_FILESIZE * szPageLength,uint32_t * dwObjNum) const396 bool CPDF_HintTables::GetPagePos(uint32_t index,
397                                  FX_FILESIZE* szPageStartPos,
398                                  FX_FILESIZE* szPageLength,
399                                  uint32_t* dwObjNum) const {
400   if (index >= m_pLinearized->GetPageCount())
401     return false;
402 
403   *szPageStartPos = m_PageInfos[index].page_offset();
404   *szPageLength = m_PageInfos[index].page_length();
405   *dwObjNum = m_PageInfos[index].start_obj_num();
406   return true;
407 }
408 
CheckPage(uint32_t index)409 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) {
410   if (index == m_pLinearized->GetFirstPageNo())
411     return CPDF_DataAvail::kDataAvailable;
412 
413   if (index >= m_pLinearized->GetPageCount())
414     return CPDF_DataAvail::kDataError;
415 
416   const uint32_t dwLength = m_PageInfos[index].page_length();
417   if (!dwLength)
418     return CPDF_DataAvail::kDataError;
419 
420   if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
421           m_PageInfos[index].page_offset(), dwLength)) {
422     return CPDF_DataAvail::kDataNotAvailable;
423   }
424 
425   // Download data of shared objects in the page.
426   for (const uint32_t dwIndex : m_PageInfos[index].Identifiers()) {
427     if (dwIndex >= m_SharedObjGroupInfos.size())
428       continue;
429     const SharedObjGroupInfo& shared_group_info =
430         m_SharedObjGroupInfos[dwIndex];
431 
432     if (!shared_group_info.m_szOffset || !shared_group_info.m_dwLength)
433       return CPDF_DataAvail::kDataError;
434 
435     if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
436             shared_group_info.m_szOffset, shared_group_info.m_dwLength)) {
437       return CPDF_DataAvail::kDataNotAvailable;
438     }
439   }
440   return CPDF_DataAvail::kDataAvailable;
441 }
442 
LoadHintStream(CPDF_Stream * pHintStream)443 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
444   if (!pHintStream || !m_pLinearized->HasHintTable())
445     return false;
446 
447   RetainPtr<const CPDF_Object> pOffset =
448       pHintStream->GetDict()->GetObjectFor("S");
449   if (!pOffset || !pOffset->IsNumber())
450     return false;
451 
452   int shared_hint_table_offset = pOffset->GetInteger();
453   if (shared_hint_table_offset <= 0)
454     return false;
455 
456   auto pAcc =
457       pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pHintStream));
458   pAcc->LoadAllDataFiltered();
459 
460   uint32_t size = pAcc->GetSize();
461   // The header section of page offset hint table is 36 bytes.
462   // The header section of shared object hint table is 24 bytes.
463   // Hint table has at least 60 bytes.
464   const uint32_t kMinStreamLength = 60;
465   if (size < kMinStreamLength)
466     return false;
467 
468   FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset;
469   if (!safe_shared_hint_table_offset.IsValid() ||
470       size < safe_shared_hint_table_offset.ValueOrDie()) {
471     return false;
472   }
473 
474   CFX_BitStream bs(pAcc->GetSpan().first(size));
475   return ReadPageHintTable(&bs) &&
476          ReadSharedObjHintTable(&bs, shared_hint_table_offset);
477 }
478 
HintsOffsetToFileOffset(uint32_t hints_offset) const479 FX_FILESIZE CPDF_HintTables::HintsOffsetToFileOffset(
480     uint32_t hints_offset) const {
481   FX_SAFE_FILESIZE file_offset = hints_offset;
482   if (!file_offset.IsValid())
483     return 0;
484 
485   // The resulting positions shall be interpreted as if the primary hint stream
486   // itself were not present. That is, a position greater than the hint stream
487   // offset shall have the hint stream length added to it to determine the
488   // actual offset relative to the beginning of the file.
489   // See ISO 32000-1:2008 spec, annex F.4 (Hint tables).
490   // Note: The PDF spec does not mention this, but positions equal to the hint
491   // stream offset also need to have the hint stream length added to it. e.g.
492   // There exists linearized PDFs generated by Adobe software that have this
493   // property.
494   if (file_offset.ValueOrDie() >= m_pLinearized->GetHintStart())
495     file_offset += m_pLinearized->GetHintLength();
496 
497   return file_offset.ValueOrDefault(0);
498 }
499