• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fpdfapi/parser/cpdf_parser.h"
6 
7 #include <array>
8 #include <limits>
9 #include <memory>
10 #include <ostream>
11 #include <string>
12 #include <utility>
13 #include <vector>
14 
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
17 #include "core/fpdfapi/parser/cpdf_object.h"
18 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
19 #include "core/fxcrt/cfx_read_only_span_stream.h"
20 #include "core/fxcrt/fx_extension.h"
21 #include "core/fxcrt/fx_stream.h"
22 #include "core/fxcrt/retain_ptr.h"
23 #include "core/fxcrt/stl_util.h"
24 #include "testing/gmock/include/gmock/gmock.h"
25 #include "testing/gtest/include/gtest/gtest.h"
26 #include "testing/utils/path_service.h"
27 
28 using testing::ElementsAre;
29 using testing::Pair;
30 using testing::Return;
31 
32 namespace {
33 
34 struct OffsetAndType {
35   FX_FILESIZE offset;
36   CPDF_CrossRefTable::ObjectType type;
37 };
38 
GetObjInfo(const CPDF_Parser & parser,uint32_t obj_num)39 CPDF_CrossRefTable::ObjectInfo GetObjInfo(const CPDF_Parser& parser,
40                                           uint32_t obj_num) {
41   const auto* info =
42       parser.GetCrossRefTableForTesting()->GetObjectInfo(obj_num);
43   return info ? *info : CPDF_CrossRefTable::ObjectInfo();
44 }
45 
46 class TestObjectsHolder final : public CPDF_Parser::ParsedObjectsHolder {
47  public:
48   TestObjectsHolder() = default;
49   ~TestObjectsHolder() override = default;
50 
51   // CPDF_Parser::ParsedObjectsHolder:
TryInit()52   bool TryInit() override { return true; }
53   MOCK_METHOD(RetainPtr<CPDF_Object>, ParseIndirectObject, (uint32_t objnum));
54 };
55 
56 }  // namespace
57 
58 // Test-only helper to support Gmock. Cannot be in an anonymous namespace.
operator ==(const CPDF_CrossRefTable::ObjectInfo & lhs,const CPDF_CrossRefTable::ObjectInfo & rhs)59 bool operator==(const CPDF_CrossRefTable::ObjectInfo& lhs,
60                 const CPDF_CrossRefTable::ObjectInfo& rhs) {
61   if (lhs.type != rhs.type) {
62     return false;
63   }
64 
65   if (lhs.gennum != rhs.gennum) {
66     return false;
67   }
68 
69   switch (lhs.type) {
70     case CPDF_CrossRefTable::ObjectType::kFree:
71       return true;
72     case CPDF_CrossRefTable::ObjectType::kNormal:
73       return lhs.pos == rhs.pos;
74     case CPDF_CrossRefTable::ObjectType::kCompressed:
75       return lhs.archive.obj_num == rhs.archive.obj_num &&
76              lhs.archive.obj_index == rhs.archive.obj_index;
77   }
78 }
79 
80 // Test-only helper to let Gmock pretty-print `info`. Cannot be in an anonymous
81 // namespace.
operator <<(std::ostream & os,const CPDF_CrossRefTable::ObjectInfo & info)82 std::ostream& operator<<(std::ostream& os,
83                          const CPDF_CrossRefTable::ObjectInfo& info) {
84   os << "(";
85   switch (info.type) {
86     case CPDF_CrossRefTable::ObjectType::kFree:
87       os << "Free object";
88       break;
89     case CPDF_CrossRefTable::ObjectType::kNormal:
90       os << "Normal object, pos: " << info.pos
91          << ", obj_stream=" << info.is_object_stream_flag;
92       break;
93     case CPDF_CrossRefTable::ObjectType::kCompressed:
94       os << "Compressed object, archive obj_num: " << info.archive.obj_num
95          << ", archive obj_index: " << info.archive.obj_index;
96       break;
97   }
98   os << ", gennum: " << info.gennum << ")";
99   return os;
100 }
101 
102 // A wrapper class to help test member functions of CPDF_Parser.
103 class CPDF_TestParser final : public CPDF_Parser {
104  public:
CPDF_TestParser()105   CPDF_TestParser() : CPDF_Parser(&object_holder_) {}
106   ~CPDF_TestParser() = default;
107 
108   // Setup reading from a file and initial states.
InitTestFromFile(const char * path)109   bool InitTestFromFile(const char* path) {
110     RetainPtr<IFX_SeekableReadStream> pFileAccess =
111         IFX_SeekableReadStream::CreateFromFilename(path);
112     if (!pFileAccess)
113       return false;
114 
115     // For the test file, the header is set at the beginning.
116     SetSyntaxParserForTesting(
117         std::make_unique<CPDF_SyntaxParser>(std::move(pFileAccess)));
118     return true;
119   }
120 
121   // Setup reading from a buffer and initial states.
InitTestFromBufferWithOffset(pdfium::span<const uint8_t> buffer,FX_FILESIZE header_offset)122   bool InitTestFromBufferWithOffset(pdfium::span<const uint8_t> buffer,
123                                     FX_FILESIZE header_offset) {
124     SetSyntaxParserForTesting(CPDF_SyntaxParser::CreateForTesting(
125         pdfium::MakeRetain<CFX_ReadOnlySpanStream>(buffer), header_offset));
126     return true;
127   }
128 
InitTestFromBuffer(pdfium::span<const uint8_t> buffer)129   bool InitTestFromBuffer(pdfium::span<const uint8_t> buffer) {
130     return InitTestFromBufferWithOffset(buffer, 0 /*header_offset*/);
131   }
132 
133   // Expose protected CPDF_Parser methods for testing.
134   using CPDF_Parser::LoadCrossRefTable;
135   using CPDF_Parser::ParseLinearizedHeader;
136   using CPDF_Parser::ParseStartXRef;
137   using CPDF_Parser::RebuildCrossRef;
138   using CPDF_Parser::StartParseInternal;
139 
object_holder()140   TestObjectsHolder& object_holder() { return object_holder_; }
141 
142  private:
143   TestObjectsHolder object_holder_;
144 };
145 
TEST(ParserTest,RebuildCrossRefCorrectly)146 TEST(ParserTest, RebuildCrossRefCorrectly) {
147   CPDF_TestParser parser;
148   std::string test_file =
149       PathService::GetTestFilePath("parser_rebuildxref_correct.pdf");
150   ASSERT_FALSE(test_file.empty());
151   ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
152 
153   ASSERT_TRUE(parser.RebuildCrossRef());
154   constexpr std::array<FX_FILESIZE, 7> offsets = {
155       {0, 15, 61, 154, 296, 374, 450}};
156   constexpr std::array<uint16_t, 7> versions = {{0, 0, 2, 4, 6, 8, 0}};
157   for (size_t i = 0; i < std::size(offsets); ++i) {
158     EXPECT_EQ(offsets[i], GetObjInfo(parser, i).pos);
159   }
160   for (size_t i = 0; i < std::size(versions); ++i) {
161     EXPECT_EQ(versions[i], GetObjInfo(parser, i).gennum);
162   }
163 
164   const CPDF_CrossRefTable* cross_ref_table =
165       parser.GetCrossRefTableForTesting();
166   ASSERT_TRUE(cross_ref_table);
167   EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
168 }
169 
TEST(ParserTest,RebuildCrossRefFailed)170 TEST(ParserTest, RebuildCrossRefFailed) {
171   CPDF_TestParser parser;
172   std::string test_file =
173       PathService::GetTestFilePath("parser_rebuildxref_error_notrailer.pdf");
174   ASSERT_FALSE(test_file.empty());
175   ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
176 
177   ASSERT_FALSE(parser.RebuildCrossRef());
178 }
179 
TEST(ParserTest,LoadCrossRefTable)180 TEST(ParserTest, LoadCrossRefTable) {
181   {
182     static const unsigned char kXrefTable[] =
183         "xref \n"
184         "0 6 \n"
185         "0000000003 65535 f \n"
186         "0000000017 00000 n \n"
187         "0000000081 00000 n \n"
188         "0000000000 00007 f \n"
189         "0000000331 00000 n \n"
190         "0000000409 00000 n \n"
191         "trail";  // Needed to end cross ref table reading.
192     static constexpr auto kExpected = fxcrt::ToArray<OffsetAndType>({
193         {0, CPDF_CrossRefTable::ObjectType::kFree},
194         {17, CPDF_CrossRefTable::ObjectType::kNormal},
195         {81, CPDF_CrossRefTable::ObjectType::kNormal},
196         {0, CPDF_CrossRefTable::ObjectType::kFree},
197         {331, CPDF_CrossRefTable::ObjectType::kNormal},
198         {409, CPDF_CrossRefTable::ObjectType::kNormal},
199     });
200     CPDF_TestParser parser;
201     ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
202     ASSERT_TRUE(parser.LoadCrossRefTable(/*pos=*/0, /*skip=*/false));
203     for (size_t i = 0; i < std::size(kExpected); ++i) {
204       EXPECT_EQ(kExpected[i].offset, GetObjInfo(parser, i).pos);
205       EXPECT_EQ(kExpected[i].type, GetObjInfo(parser, i).type);
206     }
207   }
208   {
209     static const unsigned char kXrefTable[] =
210         "xref \n"
211         "0 1 \n"
212         "0000000000 65535 f \n"
213         "3 1 \n"
214         "0000025325 00000 n \n"
215         "8 2 \n"
216         "0000025518 00002 n \n"
217         "0000025635 00000 n \n"
218         "12 1 \n"
219         "0000025777 00000 n \n"
220         "trail";  // Needed to end cross ref table reading.
221     static constexpr auto kExpected = fxcrt::ToArray<OffsetAndType>({
222         {0, CPDF_CrossRefTable::ObjectType::kFree},
223         {0, CPDF_CrossRefTable::ObjectType::kFree},
224         {0, CPDF_CrossRefTable::ObjectType::kFree},
225         {25325, CPDF_CrossRefTable::ObjectType::kNormal},
226         {0, CPDF_CrossRefTable::ObjectType::kFree},
227         {0, CPDF_CrossRefTable::ObjectType::kFree},
228         {0, CPDF_CrossRefTable::ObjectType::kFree},
229         {0, CPDF_CrossRefTable::ObjectType::kFree},
230         {25518, CPDF_CrossRefTable::ObjectType::kNormal},
231         {25635, CPDF_CrossRefTable::ObjectType::kNormal},
232         {0, CPDF_CrossRefTable::ObjectType::kFree},
233         {0, CPDF_CrossRefTable::ObjectType::kFree},
234         {25777, CPDF_CrossRefTable::ObjectType::kNormal},
235     });
236     CPDF_TestParser parser;
237     ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
238     ASSERT_TRUE(parser.LoadCrossRefTable(/*pos=*/0, /*skip=*/false));
239     for (size_t i = 0; i < std::size(kExpected); ++i) {
240       EXPECT_EQ(kExpected[i].offset, GetObjInfo(parser, i).pos);
241       EXPECT_EQ(kExpected[i].type, GetObjInfo(parser, i).type);
242     }
243   }
244   {
245     static const unsigned char kXrefTable[] =
246         "xref \n"
247         "0 1 \n"
248         "0000000000 65535 f \n"
249         "3 1 \n"
250         "0000025325 00000 n \n"
251         "8 2 \n"
252         "0000000000 65535 f \n"
253         "0000025635 00000 n \n"
254         "12 1 \n"
255         "0000025777 00000 n \n"
256         "trail";  // Needed to end cross ref table reading.
257     static constexpr auto kExpected = fxcrt::ToArray<OffsetAndType>({
258         {0, CPDF_CrossRefTable::ObjectType::kFree},
259         {0, CPDF_CrossRefTable::ObjectType::kFree},
260         {0, CPDF_CrossRefTable::ObjectType::kFree},
261         {25325, CPDF_CrossRefTable::ObjectType::kNormal},
262         {0, CPDF_CrossRefTable::ObjectType::kFree},
263         {0, CPDF_CrossRefTable::ObjectType::kFree},
264         {0, CPDF_CrossRefTable::ObjectType::kFree},
265         {0, CPDF_CrossRefTable::ObjectType::kFree},
266         {0, CPDF_CrossRefTable::ObjectType::kFree},
267         {25635, CPDF_CrossRefTable::ObjectType::kNormal},
268         {0, CPDF_CrossRefTable::ObjectType::kFree},
269         {0, CPDF_CrossRefTable::ObjectType::kFree},
270         {25777, CPDF_CrossRefTable::ObjectType::kNormal},
271     });
272     CPDF_TestParser parser;
273     ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
274     ASSERT_TRUE(parser.LoadCrossRefTable(/*pos=*/0, /*skip=*/false));
275     for (size_t i = 0; i < std::size(kExpected); ++i) {
276       EXPECT_EQ(kExpected[i].offset, GetObjInfo(parser, i).pos);
277       EXPECT_EQ(kExpected[i].type, GetObjInfo(parser, i).type);
278     }
279   }
280   {
281     static const unsigned char kXrefTable[] =
282         "xref \n"
283         "0 7 \n"
284         "0000000002 65535 f \n"
285         "0000000023 00000 n \n"
286         "0000000003 65535 f \n"
287         "0000000004 65535 f \n"
288         "0000000000 65535 f \n"
289         "0000000045 00000 n \n"
290         "0000000179 00000 n \n"
291         "trail";  // Needed to end cross ref table reading.
292     static constexpr auto kExpected = fxcrt::ToArray<OffsetAndType>({
293         {0, CPDF_CrossRefTable::ObjectType::kFree},
294         {23, CPDF_CrossRefTable::ObjectType::kNormal},
295         {0, CPDF_CrossRefTable::ObjectType::kFree},
296         {0, CPDF_CrossRefTable::ObjectType::kFree},
297         {0, CPDF_CrossRefTable::ObjectType::kFree},
298         {45, CPDF_CrossRefTable::ObjectType::kNormal},
299         {179, CPDF_CrossRefTable::ObjectType::kNormal},
300     });
301     CPDF_TestParser parser;
302     ASSERT_TRUE(parser.InitTestFromBuffer(kXrefTable));
303     ASSERT_TRUE(parser.LoadCrossRefTable(/*pos=*/0, /*skip=*/false));
304     for (size_t i = 0; i < std::size(kExpected); ++i) {
305       EXPECT_EQ(kExpected[i].offset, GetObjInfo(parser, i).pos);
306       EXPECT_EQ(kExpected[i].type, GetObjInfo(parser, i).type);
307     }
308   }
309   {
310     // Regression test for https://crbug.com/945624 - Make sure the parser
311     // can correctly handle table sizes that are multiples of the read size,
312     // which is 1024.
313     std::string xref_table = "xref \n 0 2048 \n";
314     xref_table.reserve(41000);
315     for (int i = 0; i < 2048; ++i) {
316       char buffer[21];
317       snprintf(buffer, sizeof(buffer), "%010d 00000 n \n", i + 1);
318       xref_table += buffer;
319     }
320     xref_table += "trail";  // Needed to end cross ref table reading.
321     CPDF_TestParser parser;
322     ASSERT_TRUE(parser.InitTestFromBuffer(
323         ByteStringView(xref_table.c_str()).unsigned_span()));
324     ASSERT_TRUE(parser.LoadCrossRefTable(/*pos=*/0, /*skip=*/false));
325     for (size_t i = 0; i < 2048; ++i) {
326       EXPECT_EQ(static_cast<int>(i) + 1, GetObjInfo(parser, i).pos);
327       EXPECT_EQ(CPDF_CrossRefTable::ObjectType::kNormal,
328                 GetObjInfo(parser, i).type);
329     }
330   }
331 }
332 
TEST(ParserTest,ParseStartXRef)333 TEST(ParserTest, ParseStartXRef) {
334   CPDF_TestParser parser;
335   std::string test_file =
336       PathService::GetTestFilePath("annotation_stamp_with_ap.pdf");
337   ASSERT_FALSE(test_file.empty());
338   ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
339 
340   EXPECT_EQ(100940, parser.ParseStartXRef());
341   RetainPtr<CPDF_Object> cross_ref_stream_obj =
342       parser.ParseIndirectObjectAtForTesting(100940);
343   ASSERT_TRUE(cross_ref_stream_obj);
344   EXPECT_EQ(75u, cross_ref_stream_obj->GetObjNum());
345 }
346 
TEST(ParserTest,ParseStartXRefWithHeaderOffset)347 TEST(ParserTest, ParseStartXRefWithHeaderOffset) {
348   static constexpr FX_FILESIZE kTestHeaderOffset = 765;
349   std::string test_file =
350       PathService::GetTestFilePath("annotation_stamp_with_ap.pdf");
351   ASSERT_FALSE(test_file.empty());
352   RetainPtr<IFX_SeekableReadStream> pFileAccess =
353       IFX_SeekableReadStream::CreateFromFilename(test_file.c_str());
354   ASSERT_TRUE(pFileAccess);
355 
356   std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset);
357   ASSERT_TRUE(pFileAccess->ReadBlockAtOffset(
358       pdfium::make_span(data).subspan(kTestHeaderOffset), 0));
359   CPDF_TestParser parser;
360   parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
361 
362   EXPECT_EQ(100940, parser.ParseStartXRef());
363   RetainPtr<CPDF_Object> cross_ref_stream_obj =
364       parser.ParseIndirectObjectAtForTesting(100940);
365   ASSERT_TRUE(cross_ref_stream_obj);
366   EXPECT_EQ(75u, cross_ref_stream_obj->GetObjNum());
367 }
368 
TEST(ParserTest,ParseLinearizedWithHeaderOffset)369 TEST(ParserTest, ParseLinearizedWithHeaderOffset) {
370   static constexpr FX_FILESIZE kTestHeaderOffset = 765;
371   std::string test_file = PathService::GetTestFilePath("linearized.pdf");
372   ASSERT_FALSE(test_file.empty());
373   RetainPtr<IFX_SeekableReadStream> pFileAccess =
374       IFX_SeekableReadStream::CreateFromFilename(test_file.c_str());
375   ASSERT_TRUE(pFileAccess);
376 
377   std::vector<unsigned char> data(pFileAccess->GetSize() + kTestHeaderOffset);
378   ASSERT_TRUE(pFileAccess->ReadBlockAtOffset(
379       pdfium::make_span(data).subspan(kTestHeaderOffset), 0));
380 
381   CPDF_TestParser parser;
382   parser.InitTestFromBufferWithOffset(data, kTestHeaderOffset);
383   EXPECT_TRUE(parser.ParseLinearizedHeader());
384 
385   const CPDF_CrossRefTable* cross_ref_table =
386       parser.GetCrossRefTableForTesting();
387   ASSERT_TRUE(cross_ref_table);
388   EXPECT_EQ(0u, cross_ref_table->trailer_object_number());
389 }
390 
TEST(ParserTest,BadStartXrefShouldNotBuildCrossRefTable)391 TEST(ParserTest, BadStartXrefShouldNotBuildCrossRefTable) {
392   const unsigned char kData[] =
393       "%PDF1-7 0 obj <</Size 2 /W [0 0 0]\n>>\n"
394       "stream\n"
395       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"
396       "endstream\n"
397       "endobj\n"
398       "startxref\n"
399       "6\n"
400       "%%EOF\n";
401   CPDF_TestParser parser;
402   ASSERT_TRUE(parser.InitTestFromBuffer(kData));
403   EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser.StartParseInternal());
404   ASSERT_TRUE(parser.GetCrossRefTableForTesting());
405   EXPECT_EQ(0u, parser.GetCrossRefTableForTesting()->objects_info().size());
406 }
407 
408 class ParserXRefTest : public testing::Test {
409  public:
410   ParserXRefTest() = default;
411   ~ParserXRefTest() override = default;
412 
413   // testing::Test:
SetUp()414   void SetUp() override {
415     // Satisfy CPDF_Parser's checks, so the test data below can concentrate on
416     // the /XRef stream and avoid also providing other valid dictionaries.
417     dummy_root_ = pdfium::MakeRetain<CPDF_Dictionary>();
418     EXPECT_CALL(parser().object_holder(), ParseIndirectObject)
419         .WillRepeatedly(Return(dummy_root_));
420   }
421 
parser()422   CPDF_TestParser& parser() { return parser_; }
423 
424  private:
425   RetainPtr<CPDF_Dictionary> dummy_root_;
426   CPDF_TestParser parser_;
427 };
428 
TEST_F(ParserXRefTest,XrefObjectIndicesTooBig)429 TEST_F(ParserXRefTest, XrefObjectIndicesTooBig) {
430   // Since /Index starts at 4194303, the object number will go past
431   // `kMaxObjectNumber`.
432   static_assert(CPDF_Parser::kMaxObjectNumber == 4194304,
433                 "Unexpected kMaxObjectNumber");
434   const unsigned char kData[] =
435       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
436       "7 0 obj <<\n"
437       "  /Filter /ASCIIHexDecode\n"
438       "  /Index [4194303 3]\n"
439       "  /Root 1 0 R\n"
440       "  /Size 4194306\n"
441       "  /W [1 1 1]\n"
442       ">>\n"
443       "stream\n"
444       "01 00 00\n"
445       "01 0F 00\n"
446       "01 12 00\n"
447       "endstream\n"
448       "endobj\n"
449       "startxref\n"
450       "14\n"
451       "%%EOF\n";
452   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
453   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
454   EXPECT_FALSE(parser().xref_table_rebuilt());
455   ASSERT_TRUE(parser().GetCrossRefTableForTesting());
456   const auto& objects_info =
457       parser().GetCrossRefTableForTesting()->objects_info();
458 
459   // This should be the only object from table. Subsequent objects have object
460   // numbers that are too big.
461   CPDF_CrossRefTable::ObjectInfo only_valid_object = {
462       .type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 0};
463 
464   // TODO(thestig): Should the xref table contain object 4194305?
465   // Consider reworking CPDF_Parser's object representation to avoid having to
466   // store this placeholder object.
467   CPDF_CrossRefTable::ObjectInfo placeholder_object = {
468       .type = CPDF_CrossRefTable::ObjectType::kFree, .pos = 0};
469 
470   EXPECT_THAT(objects_info, ElementsAre(Pair(4194303, only_valid_object),
471                                         Pair(4194305, placeholder_object)));
472 }
473 
TEST_F(ParserXRefTest,XrefHasInvalidArchiveObjectNumber)474 TEST_F(ParserXRefTest, XrefHasInvalidArchiveObjectNumber) {
475   // 0xFF in the first object in the xref object stream is invalid.
476   const unsigned char kData[] =
477       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
478       "7 0 obj <<\n"
479       "  /Filter /ASCIIHexDecode\n"
480       "  /Root 1 0 R\n"
481       "  /Size 3\n"
482       "  /W [1 1 1]\n"
483       ">>\n"
484       "stream\n"
485       "02 FF 00\n"
486       "01 0F 00\n"
487       "01 12 00\n"
488       "endstream\n"
489       "endobj\n"
490       "startxref\n"
491       "14\n"
492       "%%EOF\n";
493   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
494   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
495   EXPECT_FALSE(parser().xref_table_rebuilt());
496 
497   const CPDF_CrossRefTable* cross_ref_table =
498       parser().GetCrossRefTableForTesting();
499   ASSERT_TRUE(cross_ref_table);
500   EXPECT_EQ(7u, cross_ref_table->trailer_object_number());
501   const auto& objects_info = cross_ref_table->objects_info();
502 
503   // The expectation is for the parser to skip over the first object, and
504   // continue parsing the remaining objects. So these are the second and third
505   // objects.
506   CPDF_CrossRefTable::ObjectInfo expected_objects[2] = {
507       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 15},
508       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18}};
509 
510   EXPECT_THAT(objects_info, ElementsAre(Pair(1, expected_objects[0]),
511                                         Pair(2, expected_objects[1])));
512 }
513 
TEST_F(ParserXRefTest,XrefHasInvalidObjectType)514 TEST_F(ParserXRefTest, XrefHasInvalidObjectType) {
515   // The XRef object is a dictionary and not a stream.
516   const unsigned char kData[] =
517       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
518       "7 0 obj <<\n"
519       "  /Filter /ASCIIHexDecode\n"
520       "  /Root 1 0 R\n"
521       "  /Size 3\n"
522       "  /W [1 1 1]\n"
523       ">>\n"
524       "endobj\n"
525       "startxref\n"
526       "14\n"
527       "%%EOF\n";
528 
529   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
530   EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
531 }
532 
TEST_F(ParserXRefTest,XrefHasInvalidPrevValue)533 TEST_F(ParserXRefTest, XrefHasInvalidPrevValue) {
534   // The /Prev value is an absolute offset, so it should never be negative.
535   const unsigned char kData[] =
536       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
537       "7 0 obj <<\n"
538       "  /Filter /ASCIIHexDecode\n"
539       "  /Root 1 0 R\n"
540       "  /Size 3\n"
541       "  /W [1 1 1]\n"
542       "  /Prev -1\n"
543       ">>\n"
544       "stream\n"
545       "02 FF 00\n"
546       "01 0F 00\n"
547       "01 12 00\n"
548       "endstream\n"
549       "endobj\n"
550       "startxref\n"
551       "14\n"
552       "%%EOF\n";
553 
554   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
555   EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
556 }
557 
TEST_F(ParserXRefTest,XrefHasInvalidSizeValue)558 TEST_F(ParserXRefTest, XrefHasInvalidSizeValue) {
559   // The /Size value should never be negative.
560   const unsigned char kData[] =
561       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
562       "7 0 obj <<\n"
563       "  /Filter /ASCIIHexDecode\n"
564       "  /Root 1 0 R\n"
565       "  /Size 3\n"
566       "  /W [1 1 1]\n"
567       "  /Size -1\n"
568       ">>\n"
569       "stream\n"
570       "02 FF 00\n"
571       "01 0F 00\n"
572       "01 12 00\n"
573       "endstream\n"
574       "endobj\n"
575       "startxref\n"
576       "14\n"
577       "%%EOF\n";
578 
579   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
580   EXPECT_EQ(CPDF_Parser::FORMAT_ERROR, parser().StartParseInternal());
581 }
582 
TEST_F(ParserXRefTest,XrefHasInvalidWidth)583 TEST_F(ParserXRefTest, XrefHasInvalidWidth) {
584   // The /W array needs to have at least 3 values.
585   const unsigned char kData[] =
586       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
587       "7 0 obj <<\n"
588       "  /Filter /ASCIIHexDecode\n"
589       "  /Root 1 0 R\n"
590       "  /Size 3\n"
591       "  /W [1 1]\n"
592       ">>\n"
593       "stream\n"
594       "02 FF 00\n"
595       "01 0F 00\n"
596       "01 12 00\n"
597       "endstream\n"
598       "endobj\n"
599       "startxref\n"
600       "14\n"
601       "%%EOF\n";
602 
603   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
604 
605   // StartParseInternal() succeeded not because XRef parsing succeeded, but
606   // because RebuildCrossRef() got lucky with the data stream. Therefore, don't
607   // bother checking the garbage output.
608   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
609   EXPECT_TRUE(parser().xref_table_rebuilt());
610 }
611 
TEST_F(ParserXRefTest,XrefFirstWidthEntryIsZero)612 TEST_F(ParserXRefTest, XrefFirstWidthEntryIsZero) {
613   // When the first /W array entry is 0, it implies the objects are all of the
614   // normal type.
615   const unsigned char kData[] =
616       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
617       "7 0 obj <<\n"
618       "  /Filter /ASCIIHexDecode\n"
619       "  /Root 1 0 R\n"
620       "  /Size 2\n"
621       "  /W [0 1 1]\n"
622       ">>\n"
623       "stream\n"
624       "0F 00\n"
625       "12 00\n"
626       "endstream\n"
627       "endobj\n"
628       "startxref\n"
629       "14\n"
630       "%%EOF\n";
631 
632   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
633   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
634   EXPECT_FALSE(parser().xref_table_rebuilt());
635   ASSERT_TRUE(parser().GetCrossRefTableForTesting());
636   const auto& objects_info =
637       parser().GetCrossRefTableForTesting()->objects_info();
638 
639   CPDF_CrossRefTable::ObjectInfo expected_result[2] = {
640       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 15},
641       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18}};
642 
643   EXPECT_THAT(objects_info, ElementsAre(Pair(0, expected_result[0]),
644                                         Pair(1, expected_result[1])));
645 }
646 
TEST_F(ParserXRefTest,XrefWithValidIndex)647 TEST_F(ParserXRefTest, XrefWithValidIndex) {
648   // The /Index specifies objects (2), (4, 5), (80, 81, 82).
649   const unsigned char kData[] =
650       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
651       "7 0 obj <<\n"
652       "  /Filter /ASCIIHexDecode\n"
653       "  /Root 1 0 R\n"
654       "  /Size 83\n"
655       "  /Index [2 1 4 2 80 3]\n"
656       "  /W [1 1 1]\n"
657       ">>\n"
658       "stream\n"
659       "01 00 00\n"
660       "01 0F 00\n"
661       "01 12 00\n"
662       "01 20 00\n"
663       "01 22 00\n"
664       "01 25 00\n"
665       "endstream\n"
666       "endobj\n"
667       "startxref\n"
668       "14\n"
669       "%%EOF\n";
670 
671   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
672   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
673   EXPECT_FALSE(parser().xref_table_rebuilt());
674   ASSERT_TRUE(parser().GetCrossRefTableForTesting());
675   const auto& objects_info =
676       parser().GetCrossRefTableForTesting()->objects_info();
677 
678   CPDF_CrossRefTable::ObjectInfo expected_result[6] = {
679       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 0},
680       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 15},
681       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18},
682       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 32},
683       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 34},
684       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 37}};
685 
686   EXPECT_THAT(
687       objects_info,
688       ElementsAre(Pair(2, expected_result[0]), Pair(4, expected_result[1]),
689                   Pair(5, expected_result[2]), Pair(80, expected_result[3]),
690                   Pair(81, expected_result[4]), Pair(82, expected_result[5])));
691 }
692 
TEST_F(ParserXRefTest,XrefIndexWithRepeatedObject)693 TEST_F(ParserXRefTest, XrefIndexWithRepeatedObject) {
694   // The /Index specifies objects (2, 3), (3). AKA the sub-sections overlap.
695   const unsigned char kData[] =
696       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
697       "7 0 obj <<\n"
698       "  /Filter /ASCIIHexDecode\n"
699       "  /Root 1 0 R\n"
700       "  /Size 4\n"
701       "  /Index [2 2 3 1]\n"
702       "  /W [1 1 1]\n"
703       ">>\n"
704       "stream\n"
705       "01 00 00\n"
706       "01 0F 00\n"
707       "01 12 00\n"
708       "endstream\n"
709       "endobj\n"
710       "startxref\n"
711       "14\n"
712       "%%EOF\n";
713 
714   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
715   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
716   EXPECT_FALSE(parser().xref_table_rebuilt());
717   ASSERT_TRUE(parser().GetCrossRefTableForTesting());
718   const auto& objects_info =
719       parser().GetCrossRefTableForTesting()->objects_info();
720 
721   CPDF_CrossRefTable::ObjectInfo expected_result[2] = {
722       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 0},
723       // Since the /Index does not follow the spec, this is one of the 2
724       // possible values that a parser can come up with.
725       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18}};
726 
727   EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
728                                         Pair(3, expected_result[1])));
729 }
730 
TEST_F(ParserXRefTest,XrefIndexWithOutOfOrderObjects)731 TEST_F(ParserXRefTest, XrefIndexWithOutOfOrderObjects) {
732   // The /Index specifies objects (3, 4), (2), which is not in ascending order.
733   const unsigned char kData[] =
734       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
735       "7 0 obj <<\n"
736       "  /Filter /ASCIIHexDecode\n"
737       "  /Root 1 0 R\n"
738       "  /Size 5\n"
739       "  /Index [3 2 2 1]\n"
740       "  /W [1 1 1]\n"
741       ">>\n"
742       "stream\n"
743       "01 00 00\n"
744       "01 0F 00\n"
745       "01 12 00\n"
746       "endstream\n"
747       "endobj\n"
748       "startxref\n"
749       "14\n"
750       "%%EOF\n";
751 
752   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
753   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
754   EXPECT_FALSE(parser().xref_table_rebuilt());
755   ASSERT_TRUE(parser().GetCrossRefTableForTesting());
756   const auto& objects_info =
757       parser().GetCrossRefTableForTesting()->objects_info();
758 
759   // Although the /Index does not follow the spec, the parser tolerates it.
760   CPDF_CrossRefTable::ObjectInfo expected_result[3] = {
761       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18},
762       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 0},
763       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 15}};
764 
765   EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
766                                         Pair(3, expected_result[1]),
767                                         Pair(4, expected_result[2])));
768 }
769 
TEST_F(ParserXRefTest,XrefWithIndexAndWrongSize)770 TEST_F(ParserXRefTest, XrefWithIndexAndWrongSize) {
771   // The /Index specifies objects (2), (80, 81), so the /Size should be 82,
772   // but is actually 81.
773   const unsigned char kData[] =
774       "%PDF1-7\n%\xa0\xf2\xa4\xf4\n"
775       "7 0 obj <<\n"
776       "  /Filter /ASCIIHexDecode\n"
777       "  /Root 1 0 R\n"
778       "  /Size 81\n"
779       "  /Index [2 1 80 2]\n"
780       "  /W [1 1 1]\n"
781       ">>\n"
782       "stream\n"
783       "01 00 00\n"
784       "01 0F 00\n"
785       "01 12 00\n"
786       "endstream\n"
787       "endobj\n"
788       "startxref\n"
789       "14\n"
790       "%%EOF\n";
791 
792   ASSERT_TRUE(parser().InitTestFromBuffer(kData));
793   EXPECT_EQ(CPDF_Parser::SUCCESS, parser().StartParseInternal());
794   EXPECT_FALSE(parser().xref_table_rebuilt());
795   ASSERT_TRUE(parser().GetCrossRefTableForTesting());
796   const auto& objects_info =
797       parser().GetCrossRefTableForTesting()->objects_info();
798 
799   const CPDF_CrossRefTable::ObjectInfo expected_result[3] = {
800       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 0},
801       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 15},
802       {.type = CPDF_CrossRefTable::ObjectType::kNormal, .pos = 18}};
803 
804   EXPECT_THAT(objects_info, ElementsAre(Pair(2, expected_result[0]),
805                                         Pair(80, expected_result[1]),
806                                         Pair(81, expected_result[2])));
807 }
808