1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/fpdfapi/parser/cpdf_parser.h"
6
7 #include <array>
8 #include <limits>
9 #include <memory>
10 #include <ostream>
11 #include <string>
12 #include <utility>
13 #include <vector>
14
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
17 #include "core/fpdfapi/parser/cpdf_object.h"
18 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
19 #include "core/fxcrt/cfx_read_only_span_stream.h"
20 #include "core/fxcrt/fx_extension.h"
21 #include "core/fxcrt/fx_stream.h"
22 #include "core/fxcrt/retain_ptr.h"
23 #include "core/fxcrt/stl_util.h"
24 #include "testing/gmock/include/gmock/gmock.h"
25 #include "testing/gtest/include/gtest/gtest.h"
26 #include "testing/utils/path_service.h"
27
28 using testing::ElementsAre;
29 using testing::Pair;
30 using testing::Return;
31
32 namespace {
33
34 struct OffsetAndType {
35 FX_FILESIZE offset;
36 CPDF_CrossRefTable::ObjectType type;
37 };
38
GetObjInfo(const CPDF_Parser & parser,uint32_t obj_num)39 CPDF_CrossRefTable::ObjectInfo GetObjInfo(const CPDF_Parser& parser,
40 uint32_t obj_num) {
41 const auto* info =
42 parser.GetCrossRefTableForTesting()->GetObjectInfo(obj_num);
43 return info ? *info : CPDF_CrossRefTable::ObjectInfo();
44 }
45
46 class TestObjectsHolder final : public CPDF_Parser::ParsedObjectsHolder {
47 public:
48 TestObjectsHolder() = default;
49 ~TestObjectsHolder() override = default;
50
51 // CPDF_Parser::ParsedObjectsHolder:
TryInit()52 bool TryInit() override { return true; }
53 MOCK_METHOD(RetainPtr<CPDF_Object>, ParseIndirectObject, (uint32_t objnum));
54 };
55
56 } // namespace
57
58 // Test-only helper to support Gmock. Cannot be in an anonymous namespace.
operator ==(const CPDF_CrossRefTable::ObjectInfo & lhs,const CPDF_CrossRefTable::ObjectInfo & rhs)59 bool operator==(const CPDF_CrossRefTable::ObjectInfo& lhs,
60 const CPDF_CrossRefTable::ObjectInfo& rhs) {
61 if (lhs.type != rhs.type) {
62 return false;
63 }
64
65 if (lhs.gennum != rhs.gennum) {
66 return false;
67 }
68
69 switch (lhs.type) {
70 case CPDF_CrossRefTable::ObjectType::kFree:
71 return true;
72 case CPDF_CrossRefTable::ObjectType::kNormal:
73 return lhs.pos == rhs.pos;
74 case CPDF_CrossRefTable::ObjectType::kCompressed:
75 return lhs.archive.obj_num == rhs.archive.obj_num &&
76 lhs.archive.obj_index == rhs.archive.obj_index;
77 }
78 }
79
80 // Test-only helper to let Gmock pretty-print `info`. Cannot be in an anonymous
81 // namespace.
operator <<(std::ostream & os,const CPDF_CrossRefTable::ObjectInfo & info)82 std::ostream& operator<<(std::ostream& os,
83 const CPDF_CrossRefTable::ObjectInfo& info) {
84 os << "(";
85 switch (info.type) {
86 case CPDF_CrossRefTable::ObjectType::kFree:
87 os << "Free object";
88 break;
89 case CPDF_CrossRefTable::ObjectType::kNormal:
90 os << "Normal object, pos: " << info.pos
91 << ", obj_stream=" << info.is_object_stream_flag;
92 break;
93 case CPDF_CrossRefTable::ObjectType::kCompressed:
94 os << "Compressed object, archive obj_num: " << info.archive.obj_num
95 << ", archive obj_index: " << info.archive.obj_index;
96 break;
97 }
98 os << ", gennum: " << info.gennum << ")";
99 return os;
100 }
101
102 // A wrapper class to help test member functions of CPDF_Parser.
103 class CPDF_TestParser final : public CPDF_Parser {
104 public:
CPDF_TestParser()105 CPDF_TestParser() : CPDF_Parser(&object_holder_) {}
106 ~CPDF_TestParser() = default;
107
108 // Setup reading from a file and initial states.
InitTestFromFile(const char * path)109 bool InitTestFromFile(const char* path) {
110 RetainPtr<IFX_SeekableReadStream> pFileAccess =
111 IFX_SeekableReadStream::CreateFromFilename(path);
112 if (!pFileAccess)
113 return false;
114
115 // For the test file, the header is set at the beginning.
116 SetSyntaxParserForTesting(
117 std::make_unique<CPDF_SyntaxParser>(std::move(pFileAccess)));
118 return true;
119 }
120
121 // Setup reading from a buffer and initial states.
InitTestFromBufferWithOffset(pdfium::span<const uint8_t> buffer,FX_FILESIZE header_offset)122 bool InitTestFromBufferWithOffset(pdfium::span<const uint8_t> buffer,
123 FX_FILESIZE header_offset) {
124 SetSyntaxParserForTesting(CPDF_SyntaxParser::CreateForTesting(
125 pdfium::MakeRetain<CFX_ReadOnlySpanStream>(buffer), header_offset));
126 return true;
127 }
128
InitTestFromBuffer(pdfium::span<const uint8_t> buffer)129 bool InitTestFromBuffer(pdfium::span<const uint8_t> buffer) {
130 return InitTestFromBufferWithOffset(buffer, 0 /*header_offset*/);
131 }
132
133 // Expose protected CPDF_Parser methods for testing.
134 using CPDF_Parser::LoadCrossRefTable;
135 using CPDF_Parser::ParseLinearizedHeader;
136 using CPDF_Parser::ParseStartXRef;
137 using CPDF_Parser::RebuildCrossRef;
138 using CPDF_Parser::StartParseInternal;
139
object_holder()140 TestObjectsHolder& object_holder() { return object_holder_; }
141
142 private:
143 TestObjectsHolder object_holder_;
144 };
145
TEST(ParserTest,RebuildCrossRefCorrectly)146 TEST(ParserTest, RebuildCrossRefCorrectly) {
147 CPDF_TestParser parser;
148 std::string test_file =
149 PathService::GetTestFilePath("parser_rebuildxref_correct.pdf");
150 ASSERT_FALSE(test_file.empty());
151 ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
152
153 ASSERT_TRUE(parser.RebuildCrossRef());
154 constexpr std::array<FX_FILESIZE, 7> offsets = {
155 {0, 15, 61, 154, 296, 374, 450}};
156 constexpr std::array<uint16_t, 7> versions = {{0, 0, 2, 4, 6, 8, 0}};
157 for (size_t i = 0; i < std::size(offsets); ++i) {
158 EXPECT_EQ(offsets[i], GetObjInfo(parser, i).pos);
159 }
160 for (size_t i = 0; i < std::size(versions); ++i) {
161 EXPECT_EQ(versions[i], GetObjInfo(parser, i).gennum);
162 }
163
164 const CPDF_CrossRefTable* cross_ref_table =
165 parser.GetCrossRefTableForTesting();
166 ASSERT_TRUE(cross_ref_table);
167 EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
168 }
169
TEST(ParserTest,RebuildCrossRefFailed)170 TEST(ParserTest, RebuildCrossRefFailed) {
171 CPDF_TestParser parser;
172 std::string test_file =
173 PathService::GetTestFilePath("parser_rebuildxref_error_notrailer.pdf");
174 ASSERT_FALSE(test_file.empty());
175 ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
176
177 ASSERT_FALSE(parser.RebuildCrossRef());
178 }
179
TEST(ParserTest,LoadCrossRefTable)180 TEST(ParserTest, LoadCrossRefTable) {
181 {
182 static const unsigned char kXrefTable[] =
183 "xref \n"
184 "0 6 \n"
185 "0000000003 65535 f \n"
186 "0000000017 00000 n \n"
187 "0000000081 00000 n \n"
188 "0000000000 00007 f \n"
189 "0000000331 00000 n \n"
190 "0000000409 00000 n \n"
191 "trail"; // Needed to end cross ref table reading.
192 static constexpr auto kExpected = fxcrt::ToArray<OffsetAndType>({
193 {0, CPDF_CrossRefTable::ObjectType::kFree},
194 {17, CPDF_CrossRefTable::ObjectType::kNormal},
195 {81, CPDF_CrossRefTable::ObjectType::kNormal},
196 {0, CPDF_CrossRefTable::ObjectType::kFree},
197 {331, CPDF_CrossRefTable::ObjectType::kNormal},
198 {409, CPDF_CrossRefTable::ObjectType::kNormal},
199 });
200 CPDF_TestParser parser;
201 ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
202 ASSERT_TRUE(parser.LoadCrossRefTable(/*pos=*/0, /*skip=*/false));
203 for (size_t i = 0; i < std::size(kExpected); ++i) {
204 EXPECT_EQ(kExpected[i].offset, GetObjInfo(parser, i).pos);
205 EXPECT_EQ(kExpected[i].type, GetObjInfo(parser, i).type);
206 }
207 }
208 {
209 static const unsigned char kXrefTable[] =
210 "xref \n"
211 "0 1 \n"
212 "0000000000 65535 f \n"
213 "3 1 \n"
214 "0000025325 00000 n \n"
215 "8 2 \n"
216 "0000025518 00002 n \n"
217 "0000025635 00000 n \n"
218 "12 1 \n"
219 "0000025777 00000 n \n"
220 "trail"; // Needed to end cross ref table reading.
221 static constexpr auto kExpected = fxcrt::ToArray<OffsetAndType>({
222 {0, CPDF_CrossRefTable::ObjectType::kFree},
223 {0, CPDF_CrossRefTable::ObjectType::kFree},
224 {0, CPDF_CrossRefTable::ObjectType::kFree},
225 {25325, CPDF_CrossRefTable::ObjectType::kNormal},
226 {0, CPDF_CrossRefTable::ObjectType::kFree},
227 {0, CPDF_CrossRefTable::ObjectType::kFree},
228 {0, CPDF_CrossRefTable::ObjectType::kFree},
229 {0, CPDF_CrossRefTable::ObjectType::kFree},
230 {25518, CPDF_CrossRefTable::ObjectType::kNormal},
231 {25635, CPDF_CrossRefTable::ObjectType::kNormal},
232 {0, CPDF_CrossRefTable::ObjectType::kFree},
233 {0, CPDF_CrossRefTable::ObjectType::kFree},
234 {25777, CPDF_CrossRefTable::ObjectType::kNormal},
235 });
236 CPDF_TestParser parser;
237 ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
238 ASSERT_TRUE(parser.LoadCrossRefTable(/*pos=*/0, /*skip=*/false));
239 for (size_t i = 0; i < std::size(kExpected); ++i) {
240 EXPECT_EQ(kExpected[i].offset, GetObjInfo(parser, i).pos);
241 EXPECT_EQ(kExpected[i].type, GetObjInfo(parser, i).type);
242 }
243 }
244 {
245 static const unsigned char kXrefTable[] =
246 "xref \n"
247 "0 1 \n"
248 "0000000000 65535 f \n"
249 "3 1 \n"
250 "0000025325 00000 n \n"
251 "8 2 \n"
252 "0000000000 65535 f \n"
253 "0000025635 00000 n \n"
254 "12 1 \n"
255 "0000025777 00000 n \n"
256 "trail"; // Needed to end cross ref table reading.
257 static constexpr auto kExpected = fxcrt::ToArray<OffsetAndType>({
258 {0, CPDF_CrossRefTable::ObjectType::kFree},
259 {0, CPDF_CrossRefTable::ObjectType::kFree},
260 {0, CPDF_CrossRefTable::ObjectType::kFree},
261 {25325, CPDF_CrossRefTable::ObjectType::kNormal},
262 {0, CPDF_CrossRefTable::ObjectType::kFree},
263 {0, CPDF_CrossRefTable::ObjectType::kFree},
264 {0, CPDF_CrossRefTable::ObjectType::kFree},
265 {0, CPDF_CrossRefTable::ObjectType::kFree},
266 {0, CPDF_CrossRefTable::ObjectType::kFree},
267 {25635, CPDF_CrossRefTable::ObjectType::kNormal},
268 {0, CPDF_CrossRefTable::ObjectType::kFree},
269 {0, CPDF_CrossRefTable::ObjectType::kFree},
270 {25777, CPDF_CrossRefTable::ObjectType::kNormal},
271 });
272 CPDF_TestParser parser;
273 ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
274 ASSERT_TRUE(parser.LoadCrossRefTable(/*pos=*/0, /*skip=*/false));
275 for (size_t i = 0; i < std::size(kExpected); ++i) {
276 EXPECT_EQ(kExpected[i].offset, GetObjInfo(parser, i).pos);
277 EXPECT_EQ(kExpected[i].type, GetObjInfo(parser, i).type);
278 }
279 }
280 {
281 static const unsigned char kXrefTable[] =
282 "xref \n"
283 "0 7 \n"
284 "0000000002 65535 f \n"
285 "0000000023 00000 n \n"
286 "0000000003 65535 f \n"
287 "0000000004 65535 f \n"
288 "0000000000 65535 f \n"
289 "0000000045 00000 n \n"
290 "0000000179 00000 n \n"
291 "trail"; // Needed to end cross ref table reading.
292 static constexpr auto kExpected = fxcrt::ToArray<OffsetAndType>({
293 {0, CPDF_CrossRefTable::ObjectType::kFree},
294 {23, CPDF_CrossRefTable::ObjectType::kNormal},
295 {0, CPDF_CrossRefTable::ObjectType::kFree},
296 {0, CPDF_CrossRefTable::ObjectType::kFree},
297 {0, CPDF_CrossRefTable::ObjectType::kFree},
298 {45, CPDF_CrossRefTable::ObjectType::kNormal},
299 {179, CPDF_CrossRefTable::ObjectType::kNormal},
300 });
301 CPDF_TestParser parser;
302 ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
303 ASSERT_TRUE(parser.LoadCrossRefTable(/*pos=*/0, /*skip=*/false));
304 for (size_t i = 0; i < std::size(kExpected); ++i) {
305 EXPECT_EQ(kExpected[i].offset, GetObjInfo(parser, i).pos);
306 EXPECT_EQ(kExpected[i].type, GetObjInfo(parser, i).type);
307 }
308 }
309 {
310 // Regression test for https://crbug.com/945624 - Make sure the parser
311 // can correctly handle table sizes that are multiples of the read size,
312 // which is 1024.
313 std::string xref_table = "xref \n 0 2048 \n";
314 xref_table.reserve(41000);
315 for (int i = 0; i < 2048; ++i) {
316 char buffer[21];
317 snprintf(buffer, sizeof(buffer), "%010d 00000 n \n", i + 1);
318 xref_table += buffer;
319 }
320 xref_table += "trail"; // Needed to end cross ref table reading.
321 CPDF_TestParser parser;
322 ASSERT_TRUE(parser.InitTestFromBuffer(
323 ByteStringView(xref_table.c_str()).unsigned_span()));
324 ASSERT_TRUE(parser.LoadCrossRefTable(/*pos=*/0, /*skip=*/false));
325 for (size_t i = 0; i < 2048; ++i) {
326 EXPECT_EQ(static_cast<int>(i) + 1, GetObjInfo(parser, i).pos);
327 EXPECT_EQ(CPDF_CrossRefTable::ObjectType::kNormal,
328 GetObjInfo(parser, i).type);
329 }
330 }
331 }
332
TEST(ParserTest,ParseStartXRef)333 TEST(ParserTest, ParseStartXRef) {
334 CPDF_TestParser parser;
335 std::string test_file =
336 PathService::GetTestFilePath("annotation_stamp_with_ap.pdf");
337 ASSERT_FALSE(test_file.empty());
338 ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
339
340 EXPECT_EQ(100940, parser.ParseStartXRef());
341 RetainPtr<CPDF_Object> cross_ref_stream_obj =
342 parser.ParseIndirectObjectAtForTesting(100940);
343 ASSERT_TRUE(cross_ref_stream_obj);
344 EXPECT_EQ(75u, cross_ref_stream_obj->GetObjNum());
345 }
346
TEST(ParserTest,ParseStartXRefWithHeaderOffset)347 TEST(ParserTest, ParseStartXRefWithHeaderOffset) {
348 static constexpr FX_FILESIZE kTestHeaderOffset = 765;
349 std::string test_file =
350 PathService::GetTestFilePath("annotation_stamp_with_ap.pdf");
351 ASSERT_FALSE(test_file.empty());
352 RetainPtr<IFX_SeekableReadStream> pFileAccess =
353 IFX_SeekableReadStream::CreateFromFilename(test_file.c_str());
354 ASSERT_TRUE(pFileAccess);
355
356 std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset);
357 ASSERT_TRUE(pFileAccess->ReadBlockAtOffset(
358 pdfium::make_span(data).subspan(kTestHeaderOffset), 0));
359 CPDF_TestParser parser;
360 parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
361
362 EXPECT_EQ(100940, parser.ParseStartXRef());
363 RetainPtr<CPDF_Object> cross_ref_stream_obj =
364 parser.ParseIndirectObjectAtForTesting(100940);
365 ASSERT_TRUE(cross_ref_stream_obj);
366 EXPECT_EQ(75u, cross_ref_stream_obj->GetObjNum());
367 }
368
TEST(ParserTest,ParseLinearizedWithHeaderOffset)369 TEST(ParserTest, ParseLinearizedWithHeaderOffset) {
370 static constexpr FX_FILESIZE kTestHeaderOffset = 765;
371 std::string test_file = PathService::GetTestFilePath("linearized.pdf");
372 ASSERT_FALSE(test_file.empty());
373 RetainPtr<IFX_SeekableReadStream> pFileAccess =
374 IFX_SeekableReadStream::CreateFromFilename(test_file.c_str());
375 ASSERT_TRUE(pFileAccess);
376
377 std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset);
378 ASSERT_TRUE(pFileAccess->ReadBlockAtOffset(
379 pdfium::make_span(data).subspan(kTestHeaderOffset), 0));
380
381 CPDF_TestParser parser;
382 parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
383 EXPECT_TRUE(parser.ParseLinearizedHeader());
384
385 const CPDF_CrossRefTable* cross_ref_table =
386 parser.GetCrossRefTableForTesting();
387 ASSERT_TRUE(cross_ref_table);
388 EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
389 }
390
TEST(ParserTest,BadStartXrefShouldNotBuildCrossRefTable)391 TEST(ParserTest, BadStartXrefShouldNotBuildCrossRefTable) {
392 const unsigned char kData[] =
393 "%PDF1-7 0 obj <</Size 2 /W [0 0 0]\n>>\n"
394 "stream\n"
395 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"
396 "endstream\n"
397 "endobj\n"
398 "startxref\n"
399 "6\n"
400 "%%EOF\n";
401 CPDF_TestParser parser;
402 ASSERT_TRUE(parser.InitTestFromBuffer(kData));
403 EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser.StartParseInternal());
404 ASSERT_TRUE(parser.GetCrossRefTableForTesting());
405 EXPECT_EQ(0u, parser.GetCrossRefTableForTesting()->objects_info().size());
406 }
407
408 class ParserXRefTest : public testing::Test {
409 public:
410 ParserXRefTest() = default;
411 ~ParserXRefTest() override = default;
412
413 // testing::Test:
SetUp()414 void SetUp() override {
415 // Satisfy CPDF_Parser's checks, so the test data below can concentrate on
416 // the /XRef stream and avoid also providing other valid dictionaries.
417 dummy_root_ = pdfium::MakeRetain<CPDF_Dictionary>();
418 EXPECT_CALL(parser().object_holder(), ParseIndirectObject)
419 .WillRepeatedly(Return(dummy_root_));
420 }
421
parser()422 CPDF_TestParser& parser() { return parser_; }
423
424 private:
425 RetainPtr<CPDF_Dictionary> dummy_root_;
426 CPDF_TestParser parser_;
427 };
428
TEST_F(ParserXRefTest,XrefObjectIndicesTooBig)429 TEST_F(ParserXRefTest, XrefObjectIndicesTooBig) {
430 // Since /Index starts at 4194303, the object number will go past
431 // `kMaxObjectNumber`.
432 static_assert(CPDF_Parser::kMaxObjectNumber == 4194304,
433 "Unexpected kMaxObjectNumber");
434 const unsigned char kData[] =
435 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
436 "7 0 obj <<\n"
437 " /Filter /ASCIIHexDecode\n"
438 " /Index [4194303 3]\n"
439 " /Root 1 0 R\n"
440 " /Size 4194306\n"
441 " /W [1 1 1]\n"
442 ">>\n"
443 "stream\n"
444 "01 00 00\n"
445 "01 0F 00\n"
446 "01 12 00\n"
447 "endstream\n"
448 "endobj\n"
449 "startxref\n"
450 "14\n"
451 "%%EOF\n";
452 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
453 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
454 EXPECT_FALSE(parser().xref_table_rebuilt());
455 ASSERT_TRUE(parser().GetCrossRefTableForTesting());
456 const auto& objects_info =
457 parser().GetCrossRefTableForTesting()->objects_info();
458
459 // This should be the only object from table. Subsequent objects have object
460 // numbers that are too big.
461 CPDF_CrossRefTable::ObjectInfo only_valid_object = {
462 .type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 0};
463
464 // TODO(thestig): Should the xref table contain object 4194305?
465 // Consider reworking CPDF_Parser's object representation to avoid having to
466 // store this placeholder object.
467 CPDF_CrossRefTable::ObjectInfo placeholder_object = {
468 .type = CPDF_CrossRefTable::ObjectType::kFree, .pos = 0};
469
470 EXPECT_THAT(objects_info, ElementsAre(Pair(4194303, only_valid_object),
471 Pair(4194305, placeholder_object)));
472 }
473
TEST_F(ParserXRefTest,XrefHasInvalidArchiveObjectNumber)474 TEST_F(ParserXRefTest, XrefHasInvalidArchiveObjectNumber) {
475 // 0xFF in the first object in the xref object stream is invalid.
476 const unsigned char kData[] =
477 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
478 "7 0 obj <<\n"
479 " /Filter /ASCIIHexDecode\n"
480 " /Root 1 0 R\n"
481 " /Size 3\n"
482 " /W [1 1 1]\n"
483 ">>\n"
484 "stream\n"
485 "02 FF 00\n"
486 "01 0F 00\n"
487 "01 12 00\n"
488 "endstream\n"
489 "endobj\n"
490 "startxref\n"
491 "14\n"
492 "%%EOF\n";
493 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
494 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
495 EXPECT_FALSE(parser().xref_table_rebuilt());
496
497 const CPDF_CrossRefTable* cross_ref_table =
498 parser().GetCrossRefTableForTesting();
499 ASSERT_TRUE(cross_ref_table);
500 EXPECT_EQ(7u, cross_ref_table->trailer_object_number());
501 const auto& objects_info = cross_ref_table->objects_info();
502
503 // The expectation is for the parser to skip over the first object, and
504 // continue parsing the remaining objects. So these are the second and third
505 // objects.
506 CPDF_CrossRefTable::ObjectInfo expected_objects[2] = {
507 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 15},
508 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18}};
509
510 EXPECT_THAT(objects_info, ElementsAre(Pair(1, expected_objects[0]),
511 Pair(2, expected_objects[1])));
512 }
513
TEST_F(ParserXRefTest,XrefHasInvalidObjectType)514 TEST_F(ParserXRefTest, XrefHasInvalidObjectType) {
515 // The XRef object is a dictionary and not a stream.
516 const unsigned char kData[] =
517 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
518 "7 0 obj <<\n"
519 " /Filter /ASCIIHexDecode\n"
520 " /Root 1 0 R\n"
521 " /Size 3\n"
522 " /W [1 1 1]\n"
523 ">>\n"
524 "endobj\n"
525 "startxref\n"
526 "14\n"
527 "%%EOF\n";
528
529 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
530 EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
531 }
532
TEST_F(ParserXRefTest,XrefHasInvalidPrevValue)533 TEST_F(ParserXRefTest, XrefHasInvalidPrevValue) {
534 // The /Prev value is an absolute offset, so it should never be negative.
535 const unsigned char kData[] =
536 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
537 "7 0 obj <<\n"
538 " /Filter /ASCIIHexDecode\n"
539 " /Root 1 0 R\n"
540 " /Size 3\n"
541 " /W [1 1 1]\n"
542 " /Prev -1\n"
543 ">>\n"
544 "stream\n"
545 "02 FF 00\n"
546 "01 0F 00\n"
547 "01 12 00\n"
548 "endstream\n"
549 "endobj\n"
550 "startxref\n"
551 "14\n"
552 "%%EOF\n";
553
554 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
555 EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
556 }
557
TEST_F(ParserXRefTest,XrefHasInvalidSizeValue)558 TEST_F(ParserXRefTest, XrefHasInvalidSizeValue) {
559 // The /Size value should never be negative.
560 const unsigned char kData[] =
561 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
562 "7 0 obj <<\n"
563 " /Filter /ASCIIHexDecode\n"
564 " /Root 1 0 R\n"
565 " /Size 3\n"
566 " /W [1 1 1]\n"
567 " /Size -1\n"
568 ">>\n"
569 "stream\n"
570 "02 FF 00\n"
571 "01 0F 00\n"
572 "01 12 00\n"
573 "endstream\n"
574 "endobj\n"
575 "startxref\n"
576 "14\n"
577 "%%EOF\n";
578
579 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
580 EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
581 }
582
TEST_F(ParserXRefTest,XrefHasInvalidWidth)583 TEST_F(ParserXRefTest, XrefHasInvalidWidth) {
584 // The /W array needs to have at least 3 values.
585 const unsigned char kData[] =
586 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
587 "7 0 obj <<\n"
588 " /Filter /ASCIIHexDecode\n"
589 " /Root 1 0 R\n"
590 " /Size 3\n"
591 " /W [1 1]\n"
592 ">>\n"
593 "stream\n"
594 "02 FF 00\n"
595 "01 0F 00\n"
596 "01 12 00\n"
597 "endstream\n"
598 "endobj\n"
599 "startxref\n"
600 "14\n"
601 "%%EOF\n";
602
603 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
604
605 // StartParseInternal() succeeded not because XRef parsing succeeded, but
606 // because RebuildCrossRef() got lucky with the data stream. Therefore, don't
607 // bother checking the garbage output.
608 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
609 EXPECT_TRUE(parser().xref_table_rebuilt());
610 }
611
TEST_F(ParserXRefTest,XrefFirstWidthEntryIsZero)612 TEST_F(ParserXRefTest, XrefFirstWidthEntryIsZero) {
613 // When the first /W array entry is 0, it implies the objects are all of the
614 // normal type.
615 const unsigned char kData[] =
616 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
617 "7 0 obj <<\n"
618 " /Filter /ASCIIHexDecode\n"
619 " /Root 1 0 R\n"
620 " /Size 2\n"
621 " /W [0 1 1]\n"
622 ">>\n"
623 "stream\n"
624 "0F 00\n"
625 "12 00\n"
626 "endstream\n"
627 "endobj\n"
628 "startxref\n"
629 "14\n"
630 "%%EOF\n";
631
632 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
633 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
634 EXPECT_FALSE(parser().xref_table_rebuilt());
635 ASSERT_TRUE(parser().GetCrossRefTableForTesting());
636 const auto& objects_info =
637 parser().GetCrossRefTableForTesting()->objects_info();
638
639 CPDF_CrossRefTable::ObjectInfo expected_result[2] = {
640 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 15},
641 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18}};
642
643 EXPECT_THAT(objects_info, ElementsAre(Pair(0, expected_result[0]),
644 Pair(1, expected_result[1])));
645 }
646
TEST_F(ParserXRefTest,XrefWithValidIndex)647 TEST_F(ParserXRefTest, XrefWithValidIndex) {
648 // The /Index specifies objects (2), (4, 5), (80, 81, 82).
649 const unsigned char kData[] =
650 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
651 "7 0 obj <<\n"
652 " /Filter /ASCIIHexDecode\n"
653 " /Root 1 0 R\n"
654 " /Size 83\n"
655 " /Index [2 1 4 2 80 3]\n"
656 " /W [1 1 1]\n"
657 ">>\n"
658 "stream\n"
659 "01 00 00\n"
660 "01 0F 00\n"
661 "01 12 00\n"
662 "01 20 00\n"
663 "01 22 00\n"
664 "01 25 00\n"
665 "endstream\n"
666 "endobj\n"
667 "startxref\n"
668 "14\n"
669 "%%EOF\n";
670
671 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
672 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
673 EXPECT_FALSE(parser().xref_table_rebuilt());
674 ASSERT_TRUE(parser().GetCrossRefTableForTesting());
675 const auto& objects_info =
676 parser().GetCrossRefTableForTesting()->objects_info();
677
678 CPDF_CrossRefTable::ObjectInfo expected_result[6] = {
679 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 0},
680 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 15},
681 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18},
682 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 32},
683 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 34},
684 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 37}};
685
686 EXPECT_THAT(
687 objects_info,
688 ElementsAre(Pair(2, expected_result[0]), Pair(4, expected_result[1]),
689 Pair(5, expected_result[2]), Pair(80, expected_result[3]),
690 Pair(81, expected_result[4]), Pair(82, expected_result[5])));
691 }
692
TEST_F(ParserXRefTest,XrefIndexWithRepeatedObject)693 TEST_F(ParserXRefTest, XrefIndexWithRepeatedObject) {
694 // The /Index specifies objects (2, 3), (3). AKA the sub-sections overlap.
695 const unsigned char kData[] =
696 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
697 "7 0 obj <<\n"
698 " /Filter /ASCIIHexDecode\n"
699 " /Root 1 0 R\n"
700 " /Size 4\n"
701 " /Index [2 2 3 1]\n"
702 " /W [1 1 1]\n"
703 ">>\n"
704 "stream\n"
705 "01 00 00\n"
706 "01 0F 00\n"
707 "01 12 00\n"
708 "endstream\n"
709 "endobj\n"
710 "startxref\n"
711 "14\n"
712 "%%EOF\n";
713
714 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
715 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
716 EXPECT_FALSE(parser().xref_table_rebuilt());
717 ASSERT_TRUE(parser().GetCrossRefTableForTesting());
718 const auto& objects_info =
719 parser().GetCrossRefTableForTesting()->objects_info();
720
721 CPDF_CrossRefTable::ObjectInfo expected_result[2] = {
722 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 0},
723 // Since the /Index does not follow the spec, this is one of the 2
724 // possible values that a parser can come up with.
725 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18}};
726
727 EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
728 Pair(3, expected_result[1])));
729 }
730
TEST_F(ParserXRefTest,XrefIndexWithOutOfOrderObjects)731 TEST_F(ParserXRefTest, XrefIndexWithOutOfOrderObjects) {
732 // The /Index specifies objects (3, 4), (2), which is not in ascending order.
733 const unsigned char kData[] =
734 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
735 "7 0 obj <<\n"
736 " /Filter /ASCIIHexDecode\n"
737 " /Root 1 0 R\n"
738 " /Size 5\n"
739 " /Index [3 2 2 1]\n"
740 " /W [1 1 1]\n"
741 ">>\n"
742 "stream\n"
743 "01 00 00\n"
744 "01 0F 00\n"
745 "01 12 00\n"
746 "endstream\n"
747 "endobj\n"
748 "startxref\n"
749 "14\n"
750 "%%EOF\n";
751
752 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
753 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
754 EXPECT_FALSE(parser().xref_table_rebuilt());
755 ASSERT_TRUE(parser().GetCrossRefTableForTesting());
756 const auto& objects_info =
757 parser().GetCrossRefTableForTesting()->objects_info();
758
759 // Although the /Index does not follow the spec, the parser tolerates it.
760 CPDF_CrossRefTable::ObjectInfo expected_result[3] = {
761 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18},
762 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 0},
763 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 15}};
764
765 EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
766 Pair(3, expected_result[1]),
767 Pair(4, expected_result[2])));
768 }
769
TEST_F(ParserXRefTest,XrefWithIndexAndWrongSize)770 TEST_F(ParserXRefTest, XrefWithIndexAndWrongSize) {
771 // The /Index specifies objects (2), (80, 81), so the /Size should be 82,
772 // but is actually 81.
773 const unsigned char kData[] =
774 "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
775 "7 0 obj <<\n"
776 " /Filter /ASCIIHexDecode\n"
777 " /Root 1 0 R\n"
778 " /Size 81\n"
779 " /Index [2 1 80 2]\n"
780 " /W [1 1 1]\n"
781 ">>\n"
782 "stream\n"
783 "01 00 00\n"
784 "01 0F 00\n"
785 "01 12 00\n"
786 "endstream\n"
787 "endobj\n"
788 "startxref\n"
789 "14\n"
790 "%%EOF\n";
791
792 ASSERT_TRUE(parser().InitTestFromBuffer(kData));
793 EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
794 EXPECT_FALSE(parser().xref_table_rebuilt());
795 ASSERT_TRUE(parser().GetCrossRefTableForTesting());
796 const auto& objects_info =
797 parser().GetCrossRefTableForTesting()->objects_info();
798
799 const CPDF_CrossRefTable::ObjectInfo expected_result[3] = {
800 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 0},
801 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 15},
802 {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18}};
803
804 EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
805 Pair(80, expected_result[1]),
806 Pair(81, expected_result[2])));
807 }
808