1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
8
9 #include <limits>
10
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
16 #include "core/fpdfapi/parser/cpdf_read_validator.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
19 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
20 #include "core/fxcrt/cfx_bitstream.h"
21 #include "core/fxcrt/fx_safe_types.h"
22 #include "third_party/base/numerics/safe_conversions.h"
23 #include "third_party/base/ptr_util.h"
24 #include "third_party/base/span.h"
25
26 namespace {
27
CanReadFromBitStream(const CFX_BitStream * hStream,const FX_SAFE_UINT32 & bits)28 bool CanReadFromBitStream(const CFX_BitStream* hStream,
29 const FX_SAFE_UINT32& bits) {
30 return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie();
31 }
32
33 // Sanity check values from the page table header. The note in the PDF 1.7
34 // reference for Table F.3 says the valid range is only 0 through 32. Though 0
35 // is not useful either.
IsValidPageOffsetHintTableBitCount(uint32_t bits)36 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
37 return bits > 0 && bits <= 32;
38 }
39
40 } // namespace
41
42 CPDF_HintTables::PageInfo::PageInfo() = default;
43 CPDF_HintTables::PageInfo::~PageInfo() = default;
44
45 // static
Parse(CPDF_SyntaxParser * parser,CPDF_LinearizedHeader * pLinearized)46 std::unique_ptr<CPDF_HintTables> CPDF_HintTables::Parse(
47 CPDF_SyntaxParser* parser,
48 CPDF_LinearizedHeader* pLinearized) {
49 ASSERT(parser);
50 if (!pLinearized || pLinearized->GetPageCount() <= 1 ||
51 !pLinearized->HasHintTable()) {
52 return nullptr;
53 }
54
55 const FX_FILESIZE szHintStart = pLinearized->GetHintStart();
56 const uint32_t szHintLength = pLinearized->GetHintLength();
57
58 if (!parser->GetValidator()->CheckDataRangeAndRequestIfUnavailable(
59 szHintStart, szHintLength)) {
60 return nullptr;
61 }
62
63 parser->SetPos(szHintStart);
64 RetainPtr<CPDF_Stream> hints_stream = ToStream(
65 parser->GetIndirectObject(nullptr, CPDF_SyntaxParser::ParseType::kLoose));
66
67 if (!hints_stream)
68 return nullptr;
69
70 auto pHintTables = pdfium::MakeUnique<CPDF_HintTables>(
71 parser->GetValidator().Get(), pLinearized);
72 if (!pHintTables->LoadHintStream(hints_stream.Get()))
73 return nullptr;
74
75 return pHintTables;
76 }
77
CPDF_HintTables(CPDF_ReadValidator * pValidator,CPDF_LinearizedHeader * pLinearized)78 CPDF_HintTables::CPDF_HintTables(CPDF_ReadValidator* pValidator,
79 CPDF_LinearizedHeader* pLinearized)
80 : m_pValidator(pValidator),
81 m_pLinearized(pLinearized),
82 m_nFirstPageSharedObjs(0),
83 m_szFirstPageObjOffset(0) {
84 ASSERT(m_pLinearized);
85 }
86
~CPDF_HintTables()87 CPDF_HintTables::~CPDF_HintTables() {}
88
ReadPageHintTable(CFX_BitStream * hStream)89 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
90 const uint32_t nPages = m_pLinearized->GetPageCount();
91 if (nPages < 1 || nPages >= CPDF_Document::kPageMaxNum)
92 return false;
93
94 const uint32_t nFirstPageNum = m_pLinearized->GetFirstPageNo();
95 if (nFirstPageNum >= nPages)
96 return false;
97
98 if (!hStream || hStream->IsEOF())
99 return false;
100
101 const uint32_t kHeaderSize = 288;
102 if (hStream->BitsRemaining() < kHeaderSize)
103 return false;
104
105 // Item 1: The least number of objects in a page.
106 const uint32_t dwObjLeastNum = hStream->GetBits(32);
107 if (!dwObjLeastNum)
108 return false;
109
110 // Item 2: The location of the first page's page object.
111 const FX_FILESIZE szFirstObjLoc =
112 HintsOffsetToFileOffset(hStream->GetBits(32));
113 if (!szFirstObjLoc)
114 return false;
115
116 m_szFirstPageObjOffset = szFirstObjLoc;
117
118 // Item 3: The number of bits needed to represent the difference
119 // between the greatest and least number of objects in a page.
120 const uint32_t dwDeltaObjectsBits = hStream->GetBits(16);
121 if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits))
122 return false;
123
124 // Item 4: The least length of a page in bytes.
125 const uint32_t dwPageLeastLen = hStream->GetBits(32);
126 if (!dwPageLeastLen)
127 return false;
128
129 // Item 5: The number of bits needed to represent the difference
130 // between the greatest and least length of a page, in bytes.
131 const uint32_t dwDeltaPageLenBits = hStream->GetBits(16);
132 if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits))
133 return false;
134
135 // Skip Item 6, 7, 8, 9 total 96 bits.
136 hStream->SkipBits(96);
137
138 // Item 10: The number of bits needed to represent the greatest
139 // number of shared object references.
140 const uint32_t dwSharedObjBits = hStream->GetBits(16);
141 if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits))
142 return false;
143
144 // Item 11: The number of bits needed to represent the numerically
145 // greatest shared object identifier used by the pages.
146 const uint32_t dwSharedIdBits = hStream->GetBits(16);
147 if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits))
148 return false;
149
150 // Item 12: The number of bits needed to represent the numerator of
151 // the fractional position for each shared object reference. For each
152 // shared object referenced from a page, there is an indication of
153 // where in the page's content stream the object is first referenced.
154 const uint32_t dwSharedNumeratorBits = hStream->GetBits(16);
155 if (dwSharedNumeratorBits > 32)
156 return false;
157
158 // Item 13: Skip Item 13 which has 16 bits.
159 hStream->SkipBits(16);
160
161 FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits;
162 required_bits *= nPages;
163 if (!CanReadFromBitStream(hStream, required_bits))
164 return false;
165
166 m_PageInfos = std::vector<PageInfo>(nPages);
167 m_PageInfos[nFirstPageNum].set_start_obj_num(
168 m_pLinearized->GetFirstPageObjNum());
169 // The object number of remaining pages starts from 1.
170 uint32_t dwStartObjNum = 1;
171 for (uint32_t i = 0; i < nPages; ++i) {
172 FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
173 safeDeltaObj += dwObjLeastNum;
174 if (!safeDeltaObj.IsValid())
175 return false;
176 m_PageInfos[i].set_objects_count(safeDeltaObj.ValueOrDie());
177 if (i == nFirstPageNum)
178 continue;
179 m_PageInfos[i].set_start_obj_num(dwStartObjNum);
180 dwStartObjNum += m_PageInfos[i].objects_count();
181 }
182 hStream->ByteAlign();
183
184 required_bits = dwDeltaPageLenBits;
185 required_bits *= nPages;
186 if (!CanReadFromBitStream(hStream, required_bits))
187 return false;
188
189 for (uint32_t i = 0; i < nPages; ++i) {
190 FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits);
191 safePageLen += dwPageLeastLen;
192 if (!safePageLen.IsValid())
193 return false;
194 m_PageInfos[i].set_page_length(safePageLen.ValueOrDie());
195 }
196
197 ASSERT(m_szFirstPageObjOffset);
198 m_PageInfos[nFirstPageNum].set_page_offset(m_szFirstPageObjOffset);
199 FX_FILESIZE prev_page_end = m_pLinearized->GetFirstPageEndOffset();
200 for (uint32_t i = 0; i < nPages; ++i) {
201 if (i == nFirstPageNum)
202 continue;
203 m_PageInfos[i].set_page_offset(prev_page_end);
204 prev_page_end += m_PageInfos[i].page_length();
205 }
206 hStream->ByteAlign();
207
208 // Number of shared objects.
209 required_bits = dwSharedObjBits;
210 required_bits *= nPages;
211 if (!CanReadFromBitStream(hStream, required_bits))
212 return false;
213
214 std::vector<uint32_t> dwNSharedObjsArray(nPages);
215 for (uint32_t i = 0; i < nPages; i++)
216 dwNSharedObjsArray[i] = hStream->GetBits(dwSharedObjBits);
217 hStream->ByteAlign();
218
219 // Array of identifiers, size = nshared_objects.
220 for (uint32_t i = 0; i < nPages; i++) {
221 required_bits = dwSharedIdBits;
222 required_bits *= dwNSharedObjsArray[i];
223 if (!CanReadFromBitStream(hStream, required_bits))
224 return false;
225
226 for (uint32_t j = 0; j < dwNSharedObjsArray[i]; j++)
227 m_PageInfos[i].AddIdentifier(hStream->GetBits(dwSharedIdBits));
228 }
229 hStream->ByteAlign();
230
231 if (dwSharedNumeratorBits) {
232 for (uint32_t i = 0; i < nPages; i++) {
233 FX_SAFE_UINT32 safeSize = dwNSharedObjsArray[i];
234 safeSize *= dwSharedNumeratorBits;
235 if (!CanReadFromBitStream(hStream, safeSize))
236 return false;
237
238 hStream->SkipBits(safeSize.ValueOrDie());
239 }
240 hStream->ByteAlign();
241 }
242
243 FX_SAFE_UINT32 safeTotalPageLen = nPages;
244 safeTotalPageLen *= dwDeltaPageLenBits;
245 if (!CanReadFromBitStream(hStream, safeTotalPageLen))
246 return false;
247
248 hStream->SkipBits(safeTotalPageLen.ValueOrDie());
249 hStream->ByteAlign();
250 return true;
251 }
252
ReadSharedObjHintTable(CFX_BitStream * hStream,uint32_t offset)253 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
254 uint32_t offset) {
255 if (!hStream || hStream->IsEOF())
256 return false;
257
258 FX_SAFE_UINT32 bit_offset = offset;
259 bit_offset *= 8;
260 if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
261 return false;
262 hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie());
263
264 const uint32_t kHeaderSize = 192;
265 if (hStream->BitsRemaining() < kHeaderSize)
266 return false;
267
268 // Item 1: The object number of the first object in the shared objects
269 // section.
270 uint32_t dwFirstSharedObjNum = hStream->GetBits(32);
271 if (!dwFirstSharedObjNum)
272 return false;
273
274 // Item 2: The location of the first object in the shared objects section.
275 const FX_FILESIZE szFirstSharedObjLoc =
276 HintsOffsetToFileOffset(hStream->GetBits(32));
277 if (!szFirstSharedObjLoc)
278 return false;
279
280 // Item 3: The number of shared object entries for the first page.
281 m_nFirstPageSharedObjs = hStream->GetBits(32);
282
283 // Item 4: The number of shared object entries for the shared objects
284 // section, including the number of shared object entries for the first page.
285 uint32_t dwSharedObjTotal = hStream->GetBits(32);
286
287 // Item 5: The number of bits needed to represent the greatest number of
288 // objects in a shared object group.
289 uint32_t dwSharedObjNumBits = hStream->GetBits(16);
290 if (dwSharedObjNumBits > 32)
291 return false;
292
293 // Item 6: The least length of a shared object group in bytes.
294 uint32_t dwGroupLeastLen = hStream->GetBits(32);
295
296 // Item 7: The number of bits needed to represent the difference between the
297 // greatest and least length of a shared object group, in bytes.
298 uint32_t dwDeltaGroupLen = hStream->GetBits(16);
299
300 // Trying to decode more than 32 bits isn't going to work when we write into
301 // a uint32_t. Decoding 0 bits also makes no sense.
302 if (!IsValidPageOffsetHintTableBitCount(dwDeltaGroupLen))
303 return false;
304
305 if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber ||
306 m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber ||
307 dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) {
308 return false;
309 }
310
311 FX_SAFE_UINT32 required_bits = dwSharedObjTotal;
312 required_bits *= dwDeltaGroupLen;
313 if (!CanReadFromBitStream(hStream, required_bits))
314 return false;
315
316 if (dwSharedObjTotal > 0) {
317 uint32_t dwLastSharedObj = dwSharedObjTotal - 1;
318 if (dwLastSharedObj > m_nFirstPageSharedObjs) {
319 FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum;
320 safeObjNum += dwLastSharedObj - m_nFirstPageSharedObjs;
321 if (!safeObjNum.IsValid())
322 return false;
323 }
324 }
325
326 m_SharedObjGroupInfos.resize(dwSharedObjTotal);
327 // Table F.6 – Shared object hint table, shared object group entries:
328 // Item 1: A number that, when added to the least shared object
329 // group length.
330 FX_SAFE_FILESIZE prev_shared_group_end_offset = m_szFirstPageObjOffset;
331 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
332 if (i == m_nFirstPageSharedObjs)
333 prev_shared_group_end_offset = szFirstSharedObjLoc;
334
335 FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen);
336 safeObjLen += dwGroupLeastLen;
337 if (!safeObjLen.IsValid())
338 return false;
339
340 m_SharedObjGroupInfos[i].m_dwLength = safeObjLen.ValueOrDie();
341 m_SharedObjGroupInfos[i].m_szOffset =
342 prev_shared_group_end_offset.ValueOrDie();
343 prev_shared_group_end_offset += m_SharedObjGroupInfos[i].m_dwLength;
344 if (!prev_shared_group_end_offset.IsValid())
345 return false;
346 }
347
348 hStream->ByteAlign();
349 {
350 // Item 2: A flag indicating whether the shared object signature (item 3) is
351 // present.
352 uint32_t signature_count = 0;
353 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
354 signature_count += hStream->GetBits(1);
355 }
356 hStream->ByteAlign();
357 // Item 3: (Only if item 2 is 1) The shared object signature, a 16-byte MD5
358 // hash that uniquely identifies the resource that the group of objects
359 // represents.
360 if (signature_count) {
361 required_bits = signature_count;
362 required_bits *= 128;
363 if (!CanReadFromBitStream(hStream, required_bits))
364 return false;
365
366 hStream->SkipBits(required_bits.ValueOrDie());
367 hStream->ByteAlign();
368 }
369 }
370 // Item 4: A number equal to 1 less than the number of objects in the group.
371 FX_SAFE_UINT32 cur_obj_num = m_pLinearized->GetFirstPageObjNum();
372 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
373 if (i == m_nFirstPageSharedObjs)
374 cur_obj_num = dwFirstSharedObjNum;
375
376 FX_SAFE_UINT32 obj_count =
377 dwSharedObjNumBits ? hStream->GetBits(dwSharedObjNumBits) : 0;
378 obj_count += 1;
379 if (!obj_count.IsValid())
380 return false;
381
382 uint32_t obj_num = cur_obj_num.ValueOrDie();
383 cur_obj_num += obj_count.ValueOrDie();
384 if (!cur_obj_num.IsValid())
385 return false;
386
387 m_SharedObjGroupInfos[i].m_dwStartObjNum = obj_num;
388 m_SharedObjGroupInfos[i].m_dwObjectsCount = obj_count.ValueOrDie();
389 }
390
391 hStream->ByteAlign();
392 return true;
393 }
394
GetPagePos(uint32_t index,FX_FILESIZE * szPageStartPos,FX_FILESIZE * szPageLength,uint32_t * dwObjNum) const395 bool CPDF_HintTables::GetPagePos(uint32_t index,
396 FX_FILESIZE* szPageStartPos,
397 FX_FILESIZE* szPageLength,
398 uint32_t* dwObjNum) const {
399 if (index >= m_pLinearized->GetPageCount())
400 return false;
401
402 *szPageStartPos = m_PageInfos[index].page_offset();
403 *szPageLength = m_PageInfos[index].page_length();
404 *dwObjNum = m_PageInfos[index].start_obj_num();
405 return true;
406 }
407
CheckPage(uint32_t index)408 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) {
409 if (index == m_pLinearized->GetFirstPageNo())
410 return CPDF_DataAvail::DataAvailable;
411
412 if (index >= m_pLinearized->GetPageCount())
413 return CPDF_DataAvail::DataError;
414
415 const uint32_t dwLength = m_PageInfos[index].page_length();
416 if (!dwLength)
417 return CPDF_DataAvail::DataError;
418
419 if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
420 m_PageInfos[index].page_offset(), dwLength)) {
421 return CPDF_DataAvail::DataNotAvailable;
422 }
423
424 // Download data of shared objects in the page.
425 for (const uint32_t dwIndex : m_PageInfos[index].Identifiers()) {
426 if (dwIndex >= m_SharedObjGroupInfos.size())
427 continue;
428 const SharedObjGroupInfo& shared_group_info =
429 m_SharedObjGroupInfos[dwIndex];
430
431 if (!shared_group_info.m_szOffset || !shared_group_info.m_dwLength)
432 return CPDF_DataAvail::DataError;
433
434 if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
435 shared_group_info.m_szOffset, shared_group_info.m_dwLength)) {
436 return CPDF_DataAvail::DataNotAvailable;
437 }
438 }
439 return CPDF_DataAvail::DataAvailable;
440 }
441
LoadHintStream(CPDF_Stream * pHintStream)442 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
443 if (!pHintStream || !m_pLinearized->HasHintTable())
444 return false;
445
446 CPDF_Dictionary* pDict = pHintStream->GetDict();
447 CPDF_Object* pOffset = pDict ? pDict->GetObjectFor("S") : nullptr;
448 if (!pOffset || !pOffset->IsNumber())
449 return false;
450
451 int shared_hint_table_offset = pOffset->GetInteger();
452 if (shared_hint_table_offset <= 0)
453 return false;
454
455 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pHintStream);
456 pAcc->LoadAllDataFiltered();
457
458 uint32_t size = pAcc->GetSize();
459 // The header section of page offset hint table is 36 bytes.
460 // The header section of shared object hint table is 24 bytes.
461 // Hint table has at least 60 bytes.
462 const uint32_t kMinStreamLength = 60;
463 if (size < kMinStreamLength)
464 return false;
465
466 FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset;
467 if (!safe_shared_hint_table_offset.IsValid() ||
468 size < safe_shared_hint_table_offset.ValueOrDie()) {
469 return false;
470 }
471
472 CFX_BitStream bs(pAcc->GetSpan().subspan(0, size));
473 return ReadPageHintTable(&bs) &&
474 ReadSharedObjHintTable(&bs, shared_hint_table_offset);
475 }
476
HintsOffsetToFileOffset(uint32_t hints_offset) const477 FX_FILESIZE CPDF_HintTables::HintsOffsetToFileOffset(
478 uint32_t hints_offset) const {
479 FX_SAFE_FILESIZE file_offset = hints_offset;
480 if (!file_offset.IsValid())
481 return 0;
482
483 // The resulting positions shall be interpreted as if the primary hint stream
484 // itself were not present. That is, a position greater than the hint stream
485 // offset shall have the hint stream length added to it to determine the
486 // actual offset relative to the beginning of the file.
487 // See specification PDF 32000-1:2008 Annex F.4 (Hint tables).
488 // Note: The PDF spec does not mention this, but positions equal to the hint
489 // stream offset also need to have the hint stream length added to it. e.g.
490 // There exists linearized PDFs generated by Adobe software that have this
491 // property.
492 if (file_offset.ValueOrDie() >= m_pLinearized->GetHintStart())
493 file_offset += m_pLinearized->GetHintLength();
494
495 return file_offset.ValueOrDefault(0);
496 }
497