1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
6
7 #include <memory>
8 #include <string>
9 #include <utility>
10
11 #include "core/fpdfapi/page/cpdf_pagemodule.h"
12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
15 #include "core/fpdfapi/parser/cpdf_object.h"
16 #include "core/fpdfapi/parser/cpdf_read_validator.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
19 #include "core/fxcrt/cfx_readonlymemorystream.h"
20 #include "core/fxcrt/fx_stream.h"
21 #include "testing/gmock/include/gmock/gmock.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23 #include "testing/utils/path_service.h"
24 #include "third_party/base/ptr_util.h"
25
26 namespace {
27
MakeValidatorFromFile(const std::string & file_name)28 RetainPtr<CPDF_ReadValidator> MakeValidatorFromFile(
29 const std::string& file_name) {
30 std::string file_path;
31 PathService::GetTestFilePath(file_name, &file_path);
32 ASSERT(!file_path.empty());
33 return pdfium::MakeRetain<CPDF_ReadValidator>(
34 IFX_SeekableReadStream::CreateFromFilename(file_path.c_str()), nullptr);
35 }
36
MakeDataAvailFromFile(const std::string & file_name)37 std::unique_ptr<CPDF_DataAvail> MakeDataAvailFromFile(
38 const std::string& file_name) {
39 return pdfium::MakeUnique<CPDF_DataAvail>(
40 nullptr, MakeValidatorFromFile(file_name), true);
41 }
42
43 class TestLinearizedHeader final : public CPDF_LinearizedHeader {
44 public:
TestLinearizedHeader(const CPDF_Dictionary * pDict,FX_FILESIZE szLastXRefOffset)45 TestLinearizedHeader(const CPDF_Dictionary* pDict,
46 FX_FILESIZE szLastXRefOffset)
47 : CPDF_LinearizedHeader(pDict, szLastXRefOffset) {}
48
MakeHeader(const std::string & inline_data)49 static std::unique_ptr<CPDF_LinearizedHeader> MakeHeader(
50 const std::string& inline_data) {
51 CPDF_SyntaxParser parser(pdfium::MakeRetain<CFX_ReadOnlyMemoryStream>(
52 pdfium::as_bytes(pdfium::make_span(inline_data))));
53 RetainPtr<CPDF_Dictionary> dict =
54 ToDictionary(parser.GetObjectBody(nullptr));
55 ASSERT(dict);
56 return pdfium::MakeUnique<TestLinearizedHeader>(dict.Get(), 0);
57 }
58 };
59
60 } // namespace
61
62 class CPDF_HintTablesTest : public testing::Test {
63 public:
CPDF_HintTablesTest()64 CPDF_HintTablesTest() {
65 // Needs for encoding Hint table stream.
66 CPDF_PageModule::Create();
67 }
68
~CPDF_HintTablesTest()69 ~CPDF_HintTablesTest() override { CPDF_PageModule::Destroy(); }
70 };
71
TEST_F(CPDF_HintTablesTest,Load)72 TEST_F(CPDF_HintTablesTest, Load) {
73 auto data_avail = MakeDataAvailFromFile("feature_linearized_loading.pdf");
74 ASSERT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable,
75 data_avail->IsDocAvail(nullptr));
76
77 ASSERT_TRUE(data_avail->GetHintTables());
78
79 const CPDF_HintTables* hint_tables = data_avail->GetHintTables();
80 FX_FILESIZE page_start = 0;
81 FX_FILESIZE page_length = 0;
82 uint32_t page_obj_num = 0;
83
84 ASSERT_TRUE(
85 hint_tables->GetPagePos(0, &page_start, &page_length, &page_obj_num));
86 EXPECT_EQ(777, page_start);
87 EXPECT_EQ(4328, page_length);
88 EXPECT_EQ(39u, page_obj_num);
89
90 ASSERT_TRUE(
91 hint_tables->GetPagePos(1, &page_start, &page_length, &page_obj_num));
92 EXPECT_EQ(5105, page_start);
93 EXPECT_EQ(767, page_length);
94 EXPECT_EQ(1u, page_obj_num);
95
96 ASSERT_FALSE(
97 hint_tables->GetPagePos(2, &page_start, &page_length, &page_obj_num));
98 }
99
TEST_F(CPDF_HintTablesTest,PageAndGroupInfos)100 TEST_F(CPDF_HintTablesTest, PageAndGroupInfos) {
101 auto data_avail = MakeDataAvailFromFile("feature_linearized_loading.pdf");
102 ASSERT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable,
103 data_avail->IsDocAvail(nullptr));
104
105 const CPDF_HintTables* hint_tables = data_avail->GetHintTables();
106 ASSERT_TRUE(hint_tables);
107 ASSERT_EQ(2u, hint_tables->PageInfos().size());
108
109 EXPECT_EQ(5u, hint_tables->PageInfos()[0].objects_count());
110 EXPECT_EQ(777, hint_tables->PageInfos()[0].page_offset());
111 EXPECT_EQ(4328u, hint_tables->PageInfos()[0].page_length());
112 EXPECT_EQ(39u, hint_tables->PageInfos()[0].start_obj_num());
113 ASSERT_EQ(2u, hint_tables->PageInfos()[0].Identifiers().size());
114
115 EXPECT_EQ(0u, hint_tables->PageInfos()[0].Identifiers()[0]);
116 EXPECT_EQ(0u, hint_tables->PageInfos()[0].Identifiers()[1]);
117
118 EXPECT_EQ(3u, hint_tables->PageInfos()[1].objects_count());
119 EXPECT_EQ(5105, hint_tables->PageInfos()[1].page_offset());
120 EXPECT_EQ(767u, hint_tables->PageInfos()[1].page_length());
121 EXPECT_EQ(1u, hint_tables->PageInfos()[1].start_obj_num());
122 ASSERT_EQ(3u, hint_tables->PageInfos()[1].Identifiers().size());
123
124 EXPECT_EQ(2u, hint_tables->PageInfos()[1].Identifiers()[0]);
125 EXPECT_EQ(5u, hint_tables->PageInfos()[1].Identifiers()[1]);
126 EXPECT_EQ(3u, hint_tables->PageInfos()[1].Identifiers()[2]);
127
128 // SharedGroupInfo
129 ASSERT_EQ(6u, hint_tables->SharedGroupInfos().size());
130
131 EXPECT_EQ(777, hint_tables->SharedGroupInfos()[0].m_szOffset);
132 EXPECT_EQ(254u, hint_tables->SharedGroupInfos()[0].m_dwLength);
133 EXPECT_EQ(39u, hint_tables->SharedGroupInfos()[0].m_dwStartObjNum);
134 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[0].m_dwObjectsCount);
135
136 EXPECT_EQ(1031, hint_tables->SharedGroupInfos()[1].m_szOffset);
137 EXPECT_EQ(389u, hint_tables->SharedGroupInfos()[1].m_dwLength);
138 EXPECT_EQ(40u, hint_tables->SharedGroupInfos()[1].m_dwStartObjNum);
139 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[1].m_dwObjectsCount);
140
141 EXPECT_EQ(1420, hint_tables->SharedGroupInfos()[2].m_szOffset);
142 EXPECT_EQ(726u, hint_tables->SharedGroupInfos()[2].m_dwLength);
143 EXPECT_EQ(41u, hint_tables->SharedGroupInfos()[2].m_dwStartObjNum);
144 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[2].m_dwObjectsCount);
145
146 EXPECT_EQ(2146, hint_tables->SharedGroupInfos()[3].m_szOffset);
147 EXPECT_EQ(290u, hint_tables->SharedGroupInfos()[3].m_dwLength);
148 EXPECT_EQ(42u, hint_tables->SharedGroupInfos()[3].m_dwStartObjNum);
149 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[3].m_dwObjectsCount);
150
151 EXPECT_EQ(2436, hint_tables->SharedGroupInfos()[4].m_szOffset);
152 EXPECT_EQ(2669u, hint_tables->SharedGroupInfos()[4].m_dwLength);
153 EXPECT_EQ(43u, hint_tables->SharedGroupInfos()[4].m_dwStartObjNum);
154 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[4].m_dwObjectsCount);
155
156 EXPECT_EQ(10939, hint_tables->SharedGroupInfos()[5].m_szOffset);
157 EXPECT_EQ(544u, hint_tables->SharedGroupInfos()[5].m_dwLength);
158 EXPECT_EQ(4u, hint_tables->SharedGroupInfos()[5].m_dwStartObjNum);
159 EXPECT_EQ(1u, hint_tables->SharedGroupInfos()[5].m_dwObjectsCount);
160 }
161
TEST_F(CPDF_HintTablesTest,FirstPageOffset)162 TEST_F(CPDF_HintTablesTest, FirstPageOffset) {
163 // Test that valid hint table is loaded, and have correct offset of first page
164 // object.
165 const auto linearized_header = TestLinearizedHeader::MakeHeader(
166 "<< /Linearized 1 /L 19326762 /H [ 123730 3816 ] /O 5932 /E 639518 /N "
167 "102 /T 19220281 >>");
168 ASSERT_TRUE(linearized_header);
169 // This hint table is extracted from linearized file, generated by qpdf tool.
170 RetainPtr<CPDF_ReadValidator> validator =
171 MakeValidatorFromFile("hint_table_102p.bin");
172 CPDF_SyntaxParser parser(validator, 0);
173 RetainPtr<CPDF_Stream> stream = ToStream(parser.GetObjectBody(nullptr));
174 ASSERT_TRUE(stream);
175 auto hint_tables = pdfium::MakeUnique<CPDF_HintTables>(
176 validator.Get(), linearized_header.get());
177 // Check that hint table will load.
178 ASSERT_TRUE(hint_tables->LoadHintStream(stream.Get()));
179 // Check that hint table have correct first page offset.
180 // 127546 is predefined real value from original file.
181 EXPECT_EQ(127546, hint_tables->GetFirstPageObjOffset());
182 }
183