1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
8
9 #include <limits>
10
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
16 #include "core/fpdfapi/parser/cpdf_read_validator.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
19 #include "core/fxcrt/cfx_bitstream.h"
20 #include "core/fxcrt/fx_safe_types.h"
21 #include "third_party/base/numerics/safe_conversions.h"
22
23 namespace {
24
CanReadFromBitStream(const CFX_BitStream * hStream,const FX_SAFE_UINT32 & bits)25 bool CanReadFromBitStream(const CFX_BitStream* hStream,
26 const FX_SAFE_UINT32& bits) {
27 return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie();
28 }
29
30 // Sanity check values from the page table header. The note in the PDF 1.7
31 // reference for Table F.3 says the valid range is only 0 through 32. Though 0
32 // is not useful either.
IsValidPageOffsetHintTableBitCount(uint32_t bits)33 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
34 return bits > 0 && bits <= 32;
35 }
36
37 } // namespace
38
CPDF_HintTables(CPDF_ReadValidator * pValidator,CPDF_LinearizedHeader * pLinearized)39 CPDF_HintTables::CPDF_HintTables(CPDF_ReadValidator* pValidator,
40 CPDF_LinearizedHeader* pLinearized)
41 : m_pValidator(pValidator),
42 m_pLinearized(pLinearized),
43 m_nFirstPageSharedObjs(0),
44 m_szFirstPageObjOffset(0) {
45 ASSERT(m_pLinearized);
46 }
47
~CPDF_HintTables()48 CPDF_HintTables::~CPDF_HintTables() {}
49
GetItemLength(uint32_t index,const std::vector<FX_FILESIZE> & szArray) const50 uint32_t CPDF_HintTables::GetItemLength(
51 uint32_t index,
52 const std::vector<FX_FILESIZE>& szArray) const {
53 if (szArray.size() < 2 || index > szArray.size() - 2 ||
54 szArray[index] > szArray[index + 1]) {
55 return 0;
56 }
57 return szArray[index + 1] - szArray[index];
58 }
59
ReadPageHintTable(CFX_BitStream * hStream)60 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
61 if (!hStream || hStream->IsEOF())
62 return false;
63
64 int nStreamOffset = ReadPrimaryHintStreamOffset();
65 if (nStreamOffset < 0)
66 return false;
67
68 int nStreamLen = ReadPrimaryHintStreamLength();
69 if (nStreamLen < 1 ||
70 !pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(nStreamLen)) {
71 return false;
72 }
73
74 const uint32_t kHeaderSize = 288;
75 if (hStream->BitsRemaining() < kHeaderSize)
76 return false;
77
78 // Item 1: The least number of objects in a page.
79 const uint32_t dwObjLeastNum = hStream->GetBits(32);
80 if (!dwObjLeastNum)
81 return false;
82
83 // Item 2: The location of the first page's page object.
84 const uint32_t dwFirstObjLoc = hStream->GetBits(32);
85 if (dwFirstObjLoc > static_cast<uint32_t>(nStreamOffset)) {
86 FX_SAFE_FILESIZE safeLoc = nStreamLen;
87 safeLoc += dwFirstObjLoc;
88 if (!safeLoc.IsValid())
89 return false;
90 m_szFirstPageObjOffset = safeLoc.ValueOrDie();
91 } else {
92 if (!pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(dwFirstObjLoc))
93 return false;
94 m_szFirstPageObjOffset = dwFirstObjLoc;
95 }
96
97 // Item 3: The number of bits needed to represent the difference
98 // between the greatest and least number of objects in a page.
99 const uint32_t dwDeltaObjectsBits = hStream->GetBits(16);
100 if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits))
101 return false;
102
103 // Item 4: The least length of a page in bytes.
104 const uint32_t dwPageLeastLen = hStream->GetBits(32);
105 if (!dwPageLeastLen)
106 return false;
107
108 // Item 5: The number of bits needed to represent the difference
109 // between the greatest and least length of a page, in bytes.
110 const uint32_t dwDeltaPageLenBits = hStream->GetBits(16);
111 if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits))
112 return false;
113
114 // Skip Item 6, 7, 8, 9 total 96 bits.
115 hStream->SkipBits(96);
116
117 // Item 10: The number of bits needed to represent the greatest
118 // number of shared object references.
119 const uint32_t dwSharedObjBits = hStream->GetBits(16);
120 if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits))
121 return false;
122
123 // Item 11: The number of bits needed to represent the numerically
124 // greatest shared object identifier used by the pages.
125 const uint32_t dwSharedIdBits = hStream->GetBits(16);
126 if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits))
127 return false;
128
129 // Item 12: The number of bits needed to represent the numerator of
130 // the fractional position for each shared object reference. For each
131 // shared object referenced from a page, there is an indication of
132 // where in the page's content stream the object is first referenced.
133 const uint32_t dwSharedNumeratorBits = hStream->GetBits(16);
134 if (!IsValidPageOffsetHintTableBitCount(dwSharedNumeratorBits))
135 return false;
136
137 // Item 13: Skip Item 13 which has 16 bits.
138 hStream->SkipBits(16);
139
140 const int nPages = GetNumberOfPages();
141 if (nPages < 1 || nPages >= FPDF_PAGE_MAX_NUM)
142 return false;
143
144 const uint32_t dwPages = pdfium::base::checked_cast<uint32_t>(nPages);
145 FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits;
146 required_bits *= dwPages;
147 if (!CanReadFromBitStream(hStream, required_bits))
148 return false;
149
150 for (int i = 0; i < nPages; ++i) {
151 FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
152 safeDeltaObj += dwObjLeastNum;
153 if (!safeDeltaObj.IsValid())
154 return false;
155 m_dwDeltaNObjsArray.push_back(safeDeltaObj.ValueOrDie());
156 }
157 hStream->ByteAlign();
158
159 required_bits = dwDeltaPageLenBits;
160 required_bits *= dwPages;
161 if (!CanReadFromBitStream(hStream, required_bits))
162 return false;
163
164 std::vector<uint32_t> dwPageLenArray;
165 for (int i = 0; i < nPages; ++i) {
166 FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits);
167 safePageLen += dwPageLeastLen;
168 if (!safePageLen.IsValid())
169 return false;
170
171 dwPageLenArray.push_back(safePageLen.ValueOrDie());
172 }
173
174 int nOffsetE = GetEndOfFirstPageOffset();
175 if (nOffsetE < 0)
176 return false;
177
178 int nFirstPageNum = GetFirstPageNumber();
179 if (nFirstPageNum < 0 || nFirstPageNum > std::numeric_limits<int>::max() - 1)
180 return false;
181
182 for (int i = 0; i < nPages; ++i) {
183 if (i == nFirstPageNum) {
184 m_szPageOffsetArray.push_back(m_szFirstPageObjOffset);
185 } else if (i == nFirstPageNum + 1) {
186 if (i == 1) {
187 m_szPageOffsetArray.push_back(nOffsetE);
188 } else {
189 m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 2] +
190 dwPageLenArray[i - 2]);
191 }
192 } else {
193 if (i == 0) {
194 m_szPageOffsetArray.push_back(nOffsetE);
195 } else {
196 m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 1] +
197 dwPageLenArray[i - 1]);
198 }
199 }
200 }
201
202 m_szPageOffsetArray.push_back(m_szPageOffsetArray[nPages - 1] +
203 dwPageLenArray[nPages - 1]);
204 hStream->ByteAlign();
205
206 // Number of shared objects.
207 required_bits = dwSharedObjBits;
208 required_bits *= dwPages;
209 if (!CanReadFromBitStream(hStream, required_bits))
210 return false;
211
212 for (int i = 0; i < nPages; i++)
213 m_dwNSharedObjsArray.push_back(hStream->GetBits(dwSharedObjBits));
214 hStream->ByteAlign();
215
216 // Array of identifiers, size = nshared_objects.
217 for (int i = 0; i < nPages; i++) {
218 required_bits = dwSharedIdBits;
219 required_bits *= m_dwNSharedObjsArray[i];
220 if (!CanReadFromBitStream(hStream, required_bits))
221 return false;
222
223 for (uint32_t j = 0; j < m_dwNSharedObjsArray[i]; j++)
224 m_dwIdentifierArray.push_back(hStream->GetBits(dwSharedIdBits));
225 }
226 hStream->ByteAlign();
227
228 for (int i = 0; i < nPages; i++) {
229 FX_SAFE_UINT32 safeSize = m_dwNSharedObjsArray[i];
230 safeSize *= dwSharedNumeratorBits;
231 if (!CanReadFromBitStream(hStream, safeSize))
232 return false;
233
234 hStream->SkipBits(safeSize.ValueOrDie());
235 }
236 hStream->ByteAlign();
237
238 FX_SAFE_UINT32 safeTotalPageLen = dwPages;
239 safeTotalPageLen *= dwDeltaPageLenBits;
240 if (!CanReadFromBitStream(hStream, safeTotalPageLen))
241 return false;
242
243 hStream->SkipBits(safeTotalPageLen.ValueOrDie());
244 hStream->ByteAlign();
245 return true;
246 }
247
ReadSharedObjHintTable(CFX_BitStream * hStream,uint32_t offset)248 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
249 uint32_t offset) {
250 if (!hStream || hStream->IsEOF())
251 return false;
252
253 int nStreamOffset = ReadPrimaryHintStreamOffset();
254 int nStreamLen = ReadPrimaryHintStreamLength();
255 if (nStreamOffset < 0 || nStreamLen < 1)
256 return false;
257
258 FX_SAFE_UINT32 bit_offset = offset;
259 bit_offset *= 8;
260 if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
261 return false;
262 hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie());
263
264 const uint32_t kHeaderSize = 192;
265 if (hStream->BitsRemaining() < kHeaderSize)
266 return false;
267
268 // Item 1: The object number of the first object in the shared objects
269 // section.
270 uint32_t dwFirstSharedObjNum = hStream->GetBits(32);
271
272 // Item 2: The location of the first object in the shared objects section.
273 uint32_t dwFirstSharedObjLoc = hStream->GetBits(32);
274 if (dwFirstSharedObjLoc > static_cast<uint32_t>(nStreamOffset))
275 dwFirstSharedObjLoc += nStreamLen;
276
277 // Item 3: The number of shared object entries for the first page.
278 m_nFirstPageSharedObjs = hStream->GetBits(32);
279
280 // Item 4: The number of shared object entries for the shared objects
281 // section, including the number of shared object entries for the first page.
282 uint32_t dwSharedObjTotal = hStream->GetBits(32);
283
284 // Item 5: The number of bits needed to represent the greatest number of
285 // objects in a shared object group. Skipped.
286 hStream->SkipBits(16);
287
288 // Item 6: The least length of a shared object group in bytes.
289 uint32_t dwGroupLeastLen = hStream->GetBits(32);
290
291 // Item 7: The number of bits needed to represent the difference between the
292 // greatest and least length of a shared object group, in bytes.
293 uint32_t dwDeltaGroupLen = hStream->GetBits(16);
294
295 // Trying to decode more than 32 bits isn't going to work when we write into
296 // a uint32_t.
297 if (dwDeltaGroupLen > 31)
298 return false;
299
300 if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber ||
301 m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber ||
302 dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) {
303 return false;
304 }
305
306 int nFirstPageObjNum = GetFirstPageObjectNumber();
307 if (nFirstPageObjNum < 0)
308 return false;
309
310 uint32_t dwPrevObjLen = 0;
311 uint32_t dwCurObjLen = 0;
312 FX_SAFE_UINT32 required_bits = dwSharedObjTotal;
313 required_bits *= dwDeltaGroupLen;
314 if (!CanReadFromBitStream(hStream, required_bits))
315 return false;
316
317 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
318 dwPrevObjLen = dwCurObjLen;
319 FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen);
320 safeObjLen += dwGroupLeastLen;
321 if (!safeObjLen.IsValid())
322 return false;
323
324 dwCurObjLen = safeObjLen.ValueOrDie();
325 if (i < m_nFirstPageSharedObjs) {
326 m_dwSharedObjNumArray.push_back(nFirstPageObjNum + i);
327 if (i == 0)
328 m_szSharedObjOffsetArray.push_back(m_szFirstPageObjOffset);
329 } else {
330 FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum;
331 safeObjNum += i - m_nFirstPageSharedObjs;
332 if (!safeObjNum.IsValid())
333 return false;
334
335 m_dwSharedObjNumArray.push_back(safeObjNum.ValueOrDie());
336 if (i == m_nFirstPageSharedObjs) {
337 FX_SAFE_FILESIZE safeLoc = dwFirstSharedObjLoc;
338 if (!safeLoc.IsValid())
339 return false;
340
341 m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
342 }
343 }
344
345 if (i != 0 && i != m_nFirstPageSharedObjs) {
346 FX_SAFE_FILESIZE safeLoc = dwPrevObjLen;
347 safeLoc += m_szSharedObjOffsetArray[i - 1];
348 if (!safeLoc.IsValid())
349 return false;
350
351 m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
352 }
353 }
354
355 if (dwSharedObjTotal > 0) {
356 FX_SAFE_FILESIZE safeLoc = dwCurObjLen;
357 safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1];
358 if (!safeLoc.IsValid())
359 return false;
360
361 m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
362 }
363
364 hStream->ByteAlign();
365 if (hStream->BitsRemaining() < dwSharedObjTotal)
366 return false;
367
368 hStream->SkipBits(dwSharedObjTotal);
369 hStream->ByteAlign();
370 return true;
371 }
372
GetPagePos(uint32_t index,FX_FILESIZE * szPageStartPos,FX_FILESIZE * szPageLength,uint32_t * dwObjNum) const373 bool CPDF_HintTables::GetPagePos(uint32_t index,
374 FX_FILESIZE* szPageStartPos,
375 FX_FILESIZE* szPageLength,
376 uint32_t* dwObjNum) const {
377 if (index >= m_pLinearized->GetPageCount())
378 return false;
379
380 *szPageStartPos = m_szPageOffsetArray[index];
381 *szPageLength = GetItemLength(index, m_szPageOffsetArray);
382
383 int nFirstPageObjNum = GetFirstPageObjectNumber();
384 if (nFirstPageObjNum < 0)
385 return false;
386
387 int nFirstPageNum = GetFirstPageNumber();
388 if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum))
389 return false;
390
391 uint32_t dwFirstPageNum = static_cast<uint32_t>(nFirstPageNum);
392 if (index == dwFirstPageNum) {
393 *dwObjNum = nFirstPageObjNum;
394 return true;
395 }
396
397 // The object number of remaining pages starts from 1.
398 *dwObjNum = 1;
399 for (uint32_t i = 0; i < index; ++i) {
400 if (i == dwFirstPageNum)
401 continue;
402 *dwObjNum += m_dwDeltaNObjsArray[i];
403 }
404 return true;
405 }
406
CheckPage(uint32_t index)407 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) {
408 int nFirstPageNum = GetFirstPageNumber();
409 if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum))
410 return CPDF_DataAvail::DataError;
411
412 if (index == static_cast<uint32_t>(nFirstPageNum))
413 return CPDF_DataAvail::DataAvailable;
414
415 uint32_t dwLength = GetItemLength(index, m_szPageOffsetArray);
416 // If two pages have the same offset, it should be treated as an error.
417 if (!dwLength)
418 return CPDF_DataAvail::DataError;
419
420 if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
421 m_szPageOffsetArray[index], dwLength))
422 return CPDF_DataAvail::DataNotAvailable;
423
424 // Download data of shared objects in the page.
425 uint32_t offset = 0;
426 for (uint32_t i = 0; i < index; ++i)
427 offset += m_dwNSharedObjsArray[i];
428
429 int nFirstPageObjNum = GetFirstPageObjectNumber();
430 if (nFirstPageObjNum < 0)
431 return CPDF_DataAvail::DataError;
432
433 uint32_t dwIndex = 0;
434 uint32_t dwObjNum = 0;
435 for (uint32_t j = 0; j < m_dwNSharedObjsArray[index]; ++j) {
436 dwIndex = m_dwIdentifierArray[offset + j];
437 if (dwIndex >= m_dwSharedObjNumArray.size())
438 return CPDF_DataAvail::DataNotAvailable;
439
440 dwObjNum = m_dwSharedObjNumArray[dwIndex];
441 if (dwObjNum >= static_cast<uint32_t>(nFirstPageObjNum) &&
442 dwObjNum <
443 static_cast<uint32_t>(nFirstPageObjNum) + m_nFirstPageSharedObjs) {
444 continue;
445 }
446
447 dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray);
448 // If two objects have the same offset, it should be treated as an error.
449 if (!dwLength)
450 return CPDF_DataAvail::DataError;
451
452 if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
453 m_szSharedObjOffsetArray[dwIndex], dwLength)) {
454 return CPDF_DataAvail::DataNotAvailable;
455 }
456 }
457 return CPDF_DataAvail::DataAvailable;
458 }
459
LoadHintStream(CPDF_Stream * pHintStream)460 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
461 if (!pHintStream)
462 return false;
463
464 CPDF_Dictionary* pDict = pHintStream->GetDict();
465 CPDF_Object* pOffset = pDict ? pDict->GetObjectFor("S") : nullptr;
466 if (!pOffset || !pOffset->IsNumber())
467 return false;
468
469 int shared_hint_table_offset = pOffset->GetInteger();
470 if (shared_hint_table_offset <= 0)
471 return false;
472
473 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pHintStream);
474 pAcc->LoadAllDataFiltered();
475
476 uint32_t size = pAcc->GetSize();
477 // The header section of page offset hint table is 36 bytes.
478 // The header section of shared object hint table is 24 bytes.
479 // Hint table has at least 60 bytes.
480 const uint32_t kMinStreamLength = 60;
481 if (size < kMinStreamLength)
482 return false;
483
484 FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset;
485 if (!safe_shared_hint_table_offset.IsValid() ||
486 size < safe_shared_hint_table_offset.ValueOrDie()) {
487 return false;
488 }
489
490 CFX_BitStream bs(pAcc->GetData(), size);
491 return ReadPageHintTable(&bs) &&
492 ReadSharedObjHintTable(&bs, shared_hint_table_offset);
493 }
494
GetEndOfFirstPageOffset() const495 int CPDF_HintTables::GetEndOfFirstPageOffset() const {
496 return static_cast<int>(m_pLinearized->GetFirstPageEndOffset());
497 }
498
GetNumberOfPages() const499 int CPDF_HintTables::GetNumberOfPages() const {
500 return static_cast<int>(m_pLinearized->GetPageCount());
501 }
502
GetFirstPageObjectNumber() const503 int CPDF_HintTables::GetFirstPageObjectNumber() const {
504 return static_cast<int>(m_pLinearized->GetFirstPageObjNum());
505 }
506
GetFirstPageNumber() const507 int CPDF_HintTables::GetFirstPageNumber() const {
508 return static_cast<int>(m_pLinearized->GetFirstPageNo());
509 }
510
ReadPrimaryHintStreamOffset() const511 int CPDF_HintTables::ReadPrimaryHintStreamOffset() const {
512 return static_cast<int>(m_pLinearized->GetHintStart());
513 }
514
ReadPrimaryHintStreamLength() const515 int CPDF_HintTables::ReadPrimaryHintStreamLength() const {
516 return static_cast<int>(m_pLinearized->GetHintLength());
517 }
518