1 // Copyright 2015 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <memory>
7 #include <string>
8 #include <utility>
9 #include <vector>
10
11 #include "core/fxcrt/bytestring.h"
12 #include "core/fxcrt/widestring.h"
13 #include "public/fpdfview.h"
14 #include "testing/embedder_test.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16 #include "testing/range_set.h"
17 #include "testing/utils/file_util.h"
18 #include "testing/utils/path_service.h"
19
20 namespace {
21
22 class MockDownloadHints final : public FX_DOWNLOADHINTS {
23 public:
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)24 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
25 }
26
MockDownloadHints()27 MockDownloadHints() {
28 FX_DOWNLOADHINTS::version = 1;
29 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
30 }
31
32 ~MockDownloadHints() = default;
33 };
34
35 class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
36 public:
TestAsyncLoader(const std::string & file_name)37 explicit TestAsyncLoader(const std::string& file_name) {
38 std::string file_path;
39 if (!PathService::GetTestFilePath(file_name, &file_path))
40 return;
41 file_contents_ = GetFileContents(file_path.c_str(), &file_length_);
42 if (!file_contents_)
43 return;
44
45 file_access_.m_FileLen = static_cast<unsigned long>(file_length_);
46 file_access_.m_GetBlock = SGetBlock;
47 file_access_.m_Param = this;
48
49 FX_DOWNLOADHINTS::version = 1;
50 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
51
52 FX_FILEAVAIL::version = 1;
53 FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
54 }
55
IsOpened() const56 bool IsOpened() const { return !!file_contents_; }
57
file_access()58 FPDF_FILEACCESS* file_access() { return &file_access_; }
hints()59 FX_DOWNLOADHINTS* hints() { return this; }
file_avail()60 FX_FILEAVAIL* file_avail() { return this; }
61
requested_segments() const62 const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
63 return requested_segments_;
64 }
65
max_requested_bound() const66 size_t max_requested_bound() const { return max_requested_bound_; }
67
ClearRequestedSegments()68 void ClearRequestedSegments() {
69 requested_segments_.clear();
70 max_requested_bound_ = 0;
71 }
72
is_new_data_available() const73 bool is_new_data_available() const { return is_new_data_available_; }
set_is_new_data_available(bool is_new_data_available)74 void set_is_new_data_available(bool is_new_data_available) {
75 is_new_data_available_ = is_new_data_available;
76 }
77
max_already_available_bound() const78 size_t max_already_available_bound() const {
79 return available_ranges_.IsEmpty()
80 ? 0
81 : available_ranges_.ranges().rbegin()->second;
82 }
83
FlushRequestedData()84 void FlushRequestedData() {
85 for (const auto& it : requested_segments_) {
86 SetDataAvailable(it.first, it.second);
87 }
88 ClearRequestedSegments();
89 }
90
file_contents()91 char* file_contents() { return file_contents_.get(); }
file_length() const92 size_t file_length() const { return file_length_; }
93
94 private:
SetDataAvailable(size_t start,size_t size)95 void SetDataAvailable(size_t start, size_t size) {
96 available_ranges_.Union(RangeSet::Range(start, start + size));
97 }
98
CheckDataAlreadyAvailable(size_t start,size_t size) const99 bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
100 return available_ranges_.Contains(RangeSet::Range(start, start + size));
101 }
102
GetBlockImpl(unsigned long pos,unsigned char * pBuf,unsigned long size)103 int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
104 if (!IsDataAvailImpl(pos, size))
105 return 0;
106 const unsigned long end =
107 std::min(static_cast<unsigned long>(file_length_), pos + size);
108 if (end <= pos)
109 return 0;
110 memcpy(pBuf, file_contents_.get() + pos, end - pos);
111 SetDataAvailable(pos, end - pos);
112 return static_cast<int>(end - pos);
113 }
114
AddSegmentImpl(size_t offset,size_t size)115 void AddSegmentImpl(size_t offset, size_t size) {
116 requested_segments_.push_back(std::make_pair(offset, size));
117 max_requested_bound_ = std::max(max_requested_bound_, offset + size);
118 }
119
IsDataAvailImpl(size_t offset,size_t size)120 bool IsDataAvailImpl(size_t offset, size_t size) {
121 if (offset + size > file_length_)
122 return false;
123 if (is_new_data_available_) {
124 SetDataAvailable(offset, size);
125 return true;
126 }
127 return CheckDataAlreadyAvailable(offset, size);
128 }
129
SGetBlock(void * param,unsigned long pos,unsigned char * pBuf,unsigned long size)130 static int SGetBlock(void* param,
131 unsigned long pos,
132 unsigned char* pBuf,
133 unsigned long size) {
134 return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
135 }
136
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)137 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
138 return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
139 }
140
SIsDataAvail(FX_FILEAVAIL * pThis,size_t offset,size_t size)141 static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
142 size_t offset,
143 size_t size) {
144 return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
145 }
146
147 FPDF_FILEACCESS file_access_;
148
149 std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
150 size_t file_length_ = 0;
151 std::vector<std::pair<size_t, size_t>> requested_segments_;
152 size_t max_requested_bound_ = 0;
153 bool is_new_data_available_ = true;
154
155 RangeSet available_ranges_;
156 };
157
158 } // namespace
159
160 class FPDFDataAvailEmbedderTest : public EmbedderTest {};
161
TEST_F(FPDFDataAvailEmbedderTest,TrailerUnterminated)162 TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) {
163 // Document must load without crashing but is too malformed to be available.
164 EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
165 MockDownloadHints hints;
166 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
167 }
168
TEST_F(FPDFDataAvailEmbedderTest,TrailerAsHexstring)169 TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) {
170 // Document must load without crashing but is too malformed to be available.
171 EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
172 MockDownloadHints hints;
173 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
174 }
175
TEST_F(FPDFDataAvailEmbedderTest,LoadUsingHintTables)176 TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) {
177 TestAsyncLoader loader("feature_linearized_loading.pdf");
178 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
179 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
180 document_ = FPDFAvail_GetDocument(avail_, nullptr);
181 ASSERT_TRUE(document_);
182 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
183
184 // No new data available, to prevent load "Pages" node.
185 loader.set_is_new_data_available(false);
186 ScopedFPDFPage page(FPDF_LoadPage(document(), 1));
187 EXPECT_TRUE(page);
188 }
189
TEST_F(FPDFDataAvailEmbedderTest,CheckFormAvailIfLinearized)190 TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) {
191 TestAsyncLoader loader("feature_linearized_loading.pdf");
192 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
193 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
194 document_ = FPDFAvail_GetDocument(avail_, nullptr);
195 ASSERT_TRUE(document_);
196
197 // Prevent access to non-requested data to coerce the parser to send new
198 // request for non available (non-requested before) data.
199 loader.set_is_new_data_available(false);
200 loader.ClearRequestedSegments();
201
202 int status = PDF_FORM_NOTAVAIL;
203 while (status == PDF_FORM_NOTAVAIL) {
204 loader.FlushRequestedData();
205 status = FPDFAvail_IsFormAvail(avail_, loader.hints());
206 }
207 EXPECT_NE(PDF_FORM_ERROR, status);
208 }
209
TEST_F(FPDFDataAvailEmbedderTest,DoNotLoadMainCrossRefForFirstPageIfLinearized)210 TEST_F(FPDFDataAvailEmbedderTest,
211 DoNotLoadMainCrossRefForFirstPageIfLinearized) {
212 TestAsyncLoader loader("feature_linearized_loading.pdf");
213 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
214 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
215 document_ = FPDFAvail_GetDocument(avail_, nullptr);
216 ASSERT_TRUE(document_);
217 const int first_page_num = FPDFAvail_GetFirstPageNum(document_);
218
219 // The main cross ref table should not be processed.
220 // (It is always at file end)
221 EXPECT_GT(loader.file_access()->m_FileLen,
222 loader.max_already_available_bound());
223
224 // Prevent access to non-requested data to coerce the parser to send new
225 // request for non available (non-requested before) data.
226 loader.set_is_new_data_available(false);
227 FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints());
228
229 // The main cross ref table should not be requested.
230 // (It is always at file end)
231 EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
232
233 // Allow parse page.
234 loader.set_is_new_data_available(true);
235 ASSERT_EQ(PDF_DATA_AVAIL,
236 FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()));
237
238 // The main cross ref table should not be processed.
239 // (It is always at file end)
240 EXPECT_GT(loader.file_access()->m_FileLen,
241 loader.max_already_available_bound());
242
243 // Prevent loading data, while page loading.
244 loader.set_is_new_data_available(false);
245 ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num));
246 EXPECT_TRUE(page);
247 }
248
TEST_F(FPDFDataAvailEmbedderTest,LoadSecondPageIfLinearizedWithHints)249 TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) {
250 TestAsyncLoader loader("feature_linearized_loading.pdf");
251 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
252 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
253 document_ = FPDFAvail_GetDocument(avail_, nullptr);
254 ASSERT_TRUE(document_);
255
256 static constexpr uint32_t kSecondPageNum = 1;
257
258 // Prevent access to non-requested data to coerce the parser to send new
259 // request for non available (non-requested before) data.
260 loader.set_is_new_data_available(false);
261 loader.ClearRequestedSegments();
262
263 int status = PDF_DATA_NOTAVAIL;
264 while (status == PDF_DATA_NOTAVAIL) {
265 loader.FlushRequestedData();
266 status = FPDFAvail_IsPageAvail(avail_, kSecondPageNum, loader.hints());
267 }
268 EXPECT_EQ(PDF_DATA_AVAIL, status);
269
270 // Prevent loading data, while page loading.
271 loader.set_is_new_data_available(false);
272 ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum));
273 EXPECT_TRUE(page);
274 }
275
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingWholeDocument)276 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) {
277 TestAsyncLoader loader("linearized.pdf");
278 loader.set_is_new_data_available(false);
279 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
280 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
281 loader.FlushRequestedData();
282 }
283
284 document_ = FPDFAvail_GetDocument(avail_, nullptr);
285 ASSERT_TRUE(document_);
286
287 // The "info" dictionary should still be unavailable.
288 EXPECT_FALSE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
289
290 // Simulate receiving whole file.
291 loader.set_is_new_data_available(true);
292 // Load second page, to parse additional crossref sections.
293 EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
294
295 EXPECT_TRUE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
296 }
297
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingFirstPage)298 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) {
299 TestAsyncLoader loader("linearized.pdf");
300 // Map "Info" to an object within the first section without breaking
301 // linearization.
302 ByteString data(loader.file_contents(), loader.file_length());
303 Optional<size_t> index = data.Find("/Info 27 0 R");
304 ASSERT_TRUE(index);
305 memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12);
306
307 loader.set_is_new_data_available(false);
308 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
309 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
310 loader.FlushRequestedData();
311 }
312
313 document_ = FPDFAvail_GetDocument(avail_, nullptr);
314 ASSERT_TRUE(document_);
315
316 // The "Info" dictionary should be available for the linearized document, if
317 // it is located in the first page section.
318 // Info was remapped to a dictionary with Type "Catalog"
319 unsigned short buffer[100] = {0};
320 EXPECT_TRUE(FPDF_GetMetaText(document_, "Type", buffer, sizeof(buffer)));
321 constexpr wchar_t kExpectedValue[] = L"Catalog";
322 EXPECT_EQ(WideString(kExpectedValue),
323 WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedValue)));
324 }
325
TEST_F(FPDFDataAvailEmbedderTest,TryLoadInvalidInfo)326 TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) {
327 TestAsyncLoader loader("linearized.pdf");
328 // Map "Info" to an invalid object without breaking linearization.
329 ByteString data(loader.file_contents(), loader.file_length());
330 Optional<size_t> index = data.Find("/Info 27 0 R");
331 ASSERT_TRUE(index);
332 memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12);
333
334 loader.set_is_new_data_available(false);
335 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
336 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
337 loader.FlushRequestedData();
338 }
339
340 document_ = FPDFAvail_GetDocument(avail_, nullptr);
341 ASSERT_TRUE(document_);
342
343 // Set all data available.
344 loader.set_is_new_data_available(true);
345 // Check second page, to load additional crossrefs.
346 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
347
348 // Test that api is robust enough to handle the bad case.
349 EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
350 }
351
TEST_F(FPDFDataAvailEmbedderTest,TryLoadNonExistsInfo)352 TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) {
353 TestAsyncLoader loader("linearized.pdf");
354 // Break the "Info" parameter without breaking linearization.
355 ByteString data(loader.file_contents(), loader.file_length());
356 Optional<size_t> index = data.Find("/Info 27 0 R");
357 ASSERT_TRUE(index);
358 memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12);
359
360 loader.set_is_new_data_available(false);
361 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
362 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
363 loader.FlushRequestedData();
364 }
365
366 document_ = FPDFAvail_GetDocument(avail_, nullptr);
367 ASSERT_TRUE(document_);
368
369 // Set all data available.
370 loader.set_is_new_data_available(true);
371 // Check second page, to load additional crossrefs.
372 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
373
374 // Test that api is robust enough to handle the bad case.
375 EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
376 }
377
TEST_F(FPDFDataAvailEmbedderTest,BadInputsToAPIs)378 TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) {
379 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr));
380 EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr));
381 EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr));
382 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr));
383 EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr));
384 EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr));
385 }
386
TEST_F(FPDFDataAvailEmbedderTest,NegativePageIndex)387 TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) {
388 TestAsyncLoader loader("linearized.pdf");
389 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
390 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
391 EXPECT_EQ(PDF_DATA_NOTAVAIL,
392 FPDFAvail_IsPageAvail(avail_, -1, loader.hints()));
393 }
394