1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <memory>
7 #include <string>
8 #include <utility>
9 #include <vector>
10
11 #include "core/fxcrt/bytestring.h"
12 #include "core/fxcrt/compiler_specific.h"
13 #include "core/fxcrt/numerics/safe_conversions.h"
14 #include "core/fxcrt/stl_util.h"
15 #include "public/fpdf_doc.h"
16 #include "public/fpdfview.h"
17 #include "testing/embedder_test.h"
18 #include "testing/fx_string_testhelpers.h"
19 #include "testing/gtest/include/gtest/gtest.h"
20 #include "testing/range_set.h"
21 #include "testing/utils/file_util.h"
22 #include "testing/utils/path_service.h"
23
24 namespace {
25
26 class MockDownloadHints final : public FX_DOWNLOADHINTS {
27 public:
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)28 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
29 }
30
MockDownloadHints()31 MockDownloadHints() {
32 FX_DOWNLOADHINTS::version = 1;
33 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
34 }
35
36 ~MockDownloadHints() = default;
37 };
38
39 class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
40 public:
TestAsyncLoader(const std::string & file_name)41 explicit TestAsyncLoader(const std::string& file_name) {
42 std::string file_path = PathService::GetTestFilePath(file_name);
43 if (file_path.empty()) {
44 return;
45 }
46 file_contents_ = GetFileContents(file_path.c_str());
47 if (file_contents_.empty()) {
48 return;
49 }
50
51 file_access_.m_FileLen =
52 pdfium::checked_cast<unsigned long>(file_contents_.size());
53 file_access_.m_GetBlock = SGetBlock;
54 file_access_.m_Param = this;
55
56 FX_DOWNLOADHINTS::version = 1;
57 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
58
59 FX_FILEAVAIL::version = 1;
60 FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
61 }
62
IsOpened() const63 bool IsOpened() const { return !file_contents_.empty(); }
64
file_access()65 FPDF_FILEACCESS* file_access() { return &file_access_; }
hints()66 FX_DOWNLOADHINTS* hints() { return this; }
file_avail()67 FX_FILEAVAIL* file_avail() { return this; }
68
requested_segments() const69 const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
70 return requested_segments_;
71 }
72
max_requested_bound() const73 size_t max_requested_bound() const { return max_requested_bound_; }
74
ClearRequestedSegments()75 void ClearRequestedSegments() {
76 requested_segments_.clear();
77 max_requested_bound_ = 0;
78 }
79
is_new_data_available() const80 bool is_new_data_available() const { return is_new_data_available_; }
set_is_new_data_available(bool is_new_data_available)81 void set_is_new_data_available(bool is_new_data_available) {
82 is_new_data_available_ = is_new_data_available;
83 }
84
max_already_available_bound() const85 size_t max_already_available_bound() const {
86 return available_ranges_.IsEmpty()
87 ? 0
88 : available_ranges_.ranges().rbegin()->second;
89 }
90
FlushRequestedData()91 void FlushRequestedData() {
92 for (const auto& it : requested_segments_) {
93 SetDataAvailable(it.first, it.second);
94 }
95 ClearRequestedSegments();
96 }
97
file_contents() const98 pdfium::span<const uint8_t> file_contents() const { return file_contents_; }
mutable_file_contents()99 pdfium::span<uint8_t> mutable_file_contents() { return file_contents_; }
100
101 private:
SetDataAvailable(size_t start,size_t size)102 void SetDataAvailable(size_t start, size_t size) {
103 available_ranges_.Union(RangeSet::Range(start, start + size));
104 }
105
CheckDataAlreadyAvailable(size_t start,size_t size) const106 bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
107 return available_ranges_.Contains(RangeSet::Range(start, start + size));
108 }
109
GetBlockImpl(unsigned long pos,unsigned char * pBuf,unsigned long size)110 int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
111 if (!IsDataAvailImpl(pos, size))
112 return 0;
113 const unsigned long end = std::min(
114 pdfium::checked_cast<unsigned long>(file_contents_.size()), pos + size);
115 if (end <= pos)
116 return 0;
117 const unsigned long bytes_to_copy = end - pos;
118 fxcrt::Copy(file_contents().subspan(pos, bytes_to_copy),
119 UNSAFE_TODO(pdfium::make_span(pBuf, size)));
120 SetDataAvailable(pos, bytes_to_copy);
121 return static_cast<int>(bytes_to_copy);
122 }
123
AddSegmentImpl(size_t offset,size_t size)124 void AddSegmentImpl(size_t offset, size_t size) {
125 requested_segments_.emplace_back(offset, size);
126 max_requested_bound_ = std::max(max_requested_bound_, offset + size);
127 }
128
IsDataAvailImpl(size_t offset,size_t size)129 bool IsDataAvailImpl(size_t offset, size_t size) {
130 if (offset + size > file_contents_.size()) {
131 return false;
132 }
133 if (is_new_data_available_) {
134 SetDataAvailable(offset, size);
135 return true;
136 }
137 return CheckDataAlreadyAvailable(offset, size);
138 }
139
SGetBlock(void * param,unsigned long pos,unsigned char * pBuf,unsigned long size)140 static int SGetBlock(void* param,
141 unsigned long pos,
142 unsigned char* pBuf,
143 unsigned long size) {
144 return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
145 }
146
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)147 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
148 return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
149 }
150
SIsDataAvail(FX_FILEAVAIL * pThis,size_t offset,size_t size)151 static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
152 size_t offset,
153 size_t size) {
154 return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
155 }
156
157 FPDF_FILEACCESS file_access_;
158
159 std::vector<uint8_t> file_contents_;
160 std::vector<std::pair<size_t, size_t>> requested_segments_;
161 size_t max_requested_bound_ = 0;
162 bool is_new_data_available_ = true;
163
164 RangeSet available_ranges_;
165 };
166
167 } // namespace
168
169 class FPDFDataAvailEmbedderTest : public EmbedderTest {};
170
TEST_F(FPDFDataAvailEmbedderTest,TrailerUnterminated)171 TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) {
172 // Document must load without crashing but is too malformed to be available.
173 EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
174 MockDownloadHints hints;
175 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints));
176 }
177
TEST_F(FPDFDataAvailEmbedderTest,TrailerAsHexstring)178 TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) {
179 // Document must load without crashing but is too malformed to be available.
180 EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
181 MockDownloadHints hints;
182 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints));
183 }
184
TEST_F(FPDFDataAvailEmbedderTest,LoadUsingHintTables)185 TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) {
186 TestAsyncLoader loader("feature_linearized_loading.pdf");
187 CreateAvail(loader.file_avail(), loader.file_access());
188 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
189 SetDocumentFromAvail();
190 ASSERT_TRUE(document());
191 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints()));
192
193 // No new data available, to prevent load "Pages" node.
194 loader.set_is_new_data_available(false);
195 ScopedFPDFPage page(FPDF_LoadPage(document(), 1));
196 EXPECT_TRUE(page);
197 }
198
TEST_F(FPDFDataAvailEmbedderTest,CheckFormAvailIfLinearized)199 TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) {
200 TestAsyncLoader loader("feature_linearized_loading.pdf");
201 CreateAvail(loader.file_avail(), loader.file_access());
202 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
203 SetDocumentFromAvail();
204 ASSERT_TRUE(document());
205
206 // Prevent access to non-requested data to coerce the parser to send new
207 // request for non available (non-requested before) data.
208 loader.set_is_new_data_available(false);
209 loader.ClearRequestedSegments();
210
211 int status = PDF_FORM_NOTAVAIL;
212 while (status == PDF_FORM_NOTAVAIL) {
213 loader.FlushRequestedData();
214 status = FPDFAvail_IsFormAvail(avail(), loader.hints());
215 }
216 EXPECT_NE(PDF_FORM_ERROR, status);
217 }
218
TEST_F(FPDFDataAvailEmbedderTest,DoNotLoadMainCrossRefForFirstPageIfLinearized)219 TEST_F(FPDFDataAvailEmbedderTest,
220 DoNotLoadMainCrossRefForFirstPageIfLinearized) {
221 TestAsyncLoader loader("feature_linearized_loading.pdf");
222 CreateAvail(loader.file_avail(), loader.file_access());
223 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
224 SetDocumentFromAvail();
225 ASSERT_TRUE(document());
226 const int first_page_num = FPDFAvail_GetFirstPageNum(document());
227
228 // The main cross ref table should not be processed.
229 // (It is always at file end)
230 EXPECT_GT(loader.file_access()->m_FileLen,
231 loader.max_already_available_bound());
232
233 // Prevent access to non-requested data to coerce the parser to send new
234 // request for non available (non-requested before) data.
235 loader.set_is_new_data_available(false);
236 FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints());
237
238 // The main cross ref table should not be requested.
239 // (It is always at file end)
240 EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
241
242 // Allow parse page.
243 loader.set_is_new_data_available(true);
244 ASSERT_EQ(PDF_DATA_AVAIL,
245 FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints()));
246
247 // The main cross ref table should not be processed.
248 // (It is always at file end)
249 EXPECT_GT(loader.file_access()->m_FileLen,
250 loader.max_already_available_bound());
251
252 // Prevent loading data, while page loading.
253 loader.set_is_new_data_available(false);
254 ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num));
255 EXPECT_TRUE(page);
256 }
257
TEST_F(FPDFDataAvailEmbedderTest,LoadSecondPageIfLinearizedWithHints)258 TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) {
259 TestAsyncLoader loader("feature_linearized_loading.pdf");
260 CreateAvail(loader.file_avail(), loader.file_access());
261 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
262 SetDocumentFromAvail();
263 ASSERT_TRUE(document());
264
265 static constexpr uint32_t kSecondPageNum = 1;
266
267 // Prevent access to non-requested data to coerce the parser to send new
268 // request for non available (non-requested before) data.
269 loader.set_is_new_data_available(false);
270 loader.ClearRequestedSegments();
271
272 int status = PDF_DATA_NOTAVAIL;
273 while (status == PDF_DATA_NOTAVAIL) {
274 loader.FlushRequestedData();
275 status = FPDFAvail_IsPageAvail(avail(), kSecondPageNum, loader.hints());
276 }
277 EXPECT_EQ(PDF_DATA_AVAIL, status);
278
279 // Prevent loading data, while page loading.
280 loader.set_is_new_data_available(false);
281 ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum));
282 EXPECT_TRUE(page);
283 }
284
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingWholeDocument)285 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) {
286 TestAsyncLoader loader("linearized.pdf");
287 loader.set_is_new_data_available(false);
288 CreateAvail(loader.file_avail(), loader.file_access());
289 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
290 loader.FlushRequestedData();
291 }
292
293 SetDocumentFromAvail();
294 ASSERT_TRUE(document());
295
296 // The "info" dictionary should still be unavailable.
297 EXPECT_FALSE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0));
298
299 // Simulate receiving whole file.
300 loader.set_is_new_data_available(true);
301 // Load second page, to parse additional crossref sections.
302 EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints()));
303
304 EXPECT_TRUE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0));
305 }
306
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingFirstPage)307 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) {
308 TestAsyncLoader loader("linearized.pdf");
309 // Map "Info" to an object within the first section without breaking
310 // linearization.
311 ByteString data(ByteStringView(loader.file_contents()));
312 std::optional<size_t> index = data.Find("/Info 27 0 R");
313 ASSERT_TRUE(index.has_value());
314 auto span = loader.mutable_file_contents().subspan(index.value()).subspan(7);
315 ASSERT_FALSE(span.empty());
316 EXPECT_EQ('7', span[0]);
317 span[0] = '9';
318
319 loader.set_is_new_data_available(false);
320 CreateAvail(loader.file_avail(), loader.file_access());
321 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
322 loader.FlushRequestedData();
323 }
324
325 SetDocumentFromAvail();
326 ASSERT_TRUE(document());
327
328 // The "Info" dictionary should be available for the linearized document, if
329 // it is located in the first page section.
330 // Info was remapped to a dictionary with Type "Catalog"
331 unsigned short buffer[100] = {0};
332 EXPECT_TRUE(FPDF_GetMetaText(document(), "Type", buffer, sizeof(buffer)));
333 EXPECT_EQ(L"Catalog", GetPlatformWString(buffer));
334 }
335
TEST_F(FPDFDataAvailEmbedderTest,TryLoadInvalidInfo)336 TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) {
337 TestAsyncLoader loader("linearized.pdf");
338 // Map "Info" to an invalid object without breaking linearization.
339 ByteString data(ByteStringView(loader.file_contents()));
340 std::optional<size_t> index = data.Find("/Info 27 0 R");
341 ASSERT_TRUE(index.has_value());
342 auto span = loader.mutable_file_contents().subspan(index.value()).subspan(6);
343 ASSERT_GE(span.size(), 2u);
344 EXPECT_EQ('2', span[0]);
345 EXPECT_EQ('7', span[1]);
346 span[0] = '9';
347 span[1] = '9';
348
349 loader.set_is_new_data_available(false);
350 CreateAvail(loader.file_avail(), loader.file_access());
351 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
352 loader.FlushRequestedData();
353 }
354
355 SetDocumentFromAvail();
356 ASSERT_TRUE(document());
357
358 // Set all data available.
359 loader.set_is_new_data_available(true);
360 // Check second page, to load additional crossrefs.
361 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints()));
362
363 // Test that api is robust enough to handle the bad case.
364 EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0));
365 }
366
TEST_F(FPDFDataAvailEmbedderTest,TryLoadNonExistsInfo)367 TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) {
368 TestAsyncLoader loader("linearized.pdf");
369 // Break the "Info" parameter without breaking linearization.
370 ByteString data(ByteStringView(loader.file_contents()));
371 std::optional<size_t> index = data.Find("/Info 27 0 R");
372 ASSERT_TRUE(index.has_value());
373 auto span = loader.mutable_file_contents().subspan(index.value()).subspan(2);
374 ASSERT_FALSE(span.empty());
375 EXPECT_EQ('n', span[0]);
376 span[0] = '_';
377
378 loader.set_is_new_data_available(false);
379 CreateAvail(loader.file_avail(), loader.file_access());
380 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
381 loader.FlushRequestedData();
382 }
383
384 SetDocumentFromAvail();
385 ASSERT_TRUE(document());
386
387 // Set all data available.
388 loader.set_is_new_data_available(true);
389 // Check second page, to load additional crossrefs.
390 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints()));
391
392 // Test that api is robust enough to handle the bad case.
393 EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0));
394 }
395
TEST_F(FPDFDataAvailEmbedderTest,BadInputsToAPIs)396 TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) {
397 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr));
398 EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr));
399 EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr));
400 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr));
401 EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr));
402 EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr));
403 }
404
TEST_F(FPDFDataAvailEmbedderTest,NegativePageIndex)405 TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) {
406 TestAsyncLoader loader("linearized.pdf");
407 CreateAvail(loader.file_avail(), loader.file_access());
408 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
409 EXPECT_EQ(PDF_DATA_NOTAVAIL,
410 FPDFAvail_IsPageAvail(avail(), -1, loader.hints()));
411 }
412
TEST_F(FPDFDataAvailEmbedderTest,Bug1324189)413 TEST_F(FPDFDataAvailEmbedderTest, Bug1324189) {
414 // Test passes if it doesn't crash.
415 TestAsyncLoader loader("bug_1324189.pdf");
416 CreateAvail(loader.file_avail(), loader.file_access());
417 ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
418 }
419
TEST_F(FPDFDataAvailEmbedderTest,Bug1324503)420 TEST_F(FPDFDataAvailEmbedderTest, Bug1324503) {
421 // Test passes if it doesn't crash.
422 TestAsyncLoader loader("bug_1324503.pdf");
423 CreateAvail(loader.file_avail(), loader.file_access());
424 ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
425 }
426