• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <memory>
7 #include <string>
8 #include <utility>
9 #include <vector>
10 
11 #include "core/fxcrt/bytestring.h"
12 #include "core/fxcrt/compiler_specific.h"
13 #include "core/fxcrt/numerics/safe_conversions.h"
14 #include "core/fxcrt/stl_util.h"
15 #include "public/fpdf_doc.h"
16 #include "public/fpdfview.h"
17 #include "testing/embedder_test.h"
18 #include "testing/fx_string_testhelpers.h"
19 #include "testing/gtest/include/gtest/gtest.h"
20 #include "testing/range_set.h"
21 #include "testing/utils/file_util.h"
22 #include "testing/utils/path_service.h"
23 
24 namespace {
25 
26 class MockDownloadHints final : public FX_DOWNLOADHINTS {
27  public:
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)28   static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
29   }
30 
MockDownloadHints()31   MockDownloadHints() {
32     FX_DOWNLOADHINTS::version = 1;
33     FX_DOWNLOADHINTS::AddSegment = SAddSegment;
34   }
35 
36   ~MockDownloadHints() = default;
37 };
38 
39 class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
40  public:
TestAsyncLoader(const std::string & file_name)41   explicit TestAsyncLoader(const std::string& file_name) {
42     std::string file_path = PathService::GetTestFilePath(file_name);
43     if (file_path.empty()) {
44       return;
45     }
46     file_contents_ = GetFileContents(file_path.c_str());
47     if (file_contents_.empty()) {
48       return;
49     }
50 
51     file_access_.m_FileLen =
52         pdfium::checked_cast<unsigned long>(file_contents_.size());
53     file_access_.m_GetBlock = SGetBlock;
54     file_access_.m_Param = this;
55 
56     FX_DOWNLOADHINTS::version = 1;
57     FX_DOWNLOADHINTS::AddSegment = SAddSegment;
58 
59     FX_FILEAVAIL::version = 1;
60     FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
61   }
62 
IsOpened() const63   bool IsOpened() const { return !file_contents_.empty(); }
64 
file_access()65   FPDF_FILEACCESS* file_access() { return &file_access_; }
hints()66   FX_DOWNLOADHINTS* hints() { return this; }
file_avail()67   FX_FILEAVAIL* file_avail() { return this; }
68 
requested_segments() const69   const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
70     return requested_segments_;
71   }
72 
max_requested_bound() const73   size_t max_requested_bound() const { return max_requested_bound_; }
74 
ClearRequestedSegments()75   void ClearRequestedSegments() {
76     requested_segments_.clear();
77     max_requested_bound_ = 0;
78   }
79 
is_new_data_available() const80   bool is_new_data_available() const { return is_new_data_available_; }
set_is_new_data_available(bool is_new_data_available)81   void set_is_new_data_available(bool is_new_data_available) {
82     is_new_data_available_ = is_new_data_available;
83   }
84 
max_already_available_bound() const85   size_t max_already_available_bound() const {
86     return available_ranges_.IsEmpty()
87                ? 0
88                : available_ranges_.ranges().rbegin()->second;
89   }
90 
FlushRequestedData()91   void FlushRequestedData() {
92     for (const auto& it : requested_segments_) {
93       SetDataAvailable(it.first, it.second);
94     }
95     ClearRequestedSegments();
96   }
97 
file_contents() const98   pdfium::span<const uint8_t> file_contents() const { return file_contents_; }
mutable_file_contents()99   pdfium::span<uint8_t> mutable_file_contents() { return file_contents_; }
100 
101  private:
SetDataAvailable(size_t start,size_t size)102   void SetDataAvailable(size_t start, size_t size) {
103     available_ranges_.Union(RangeSet::Range(start, start + size));
104   }
105 
CheckDataAlreadyAvailable(size_t start,size_t size) const106   bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
107     return available_ranges_.Contains(RangeSet::Range(start, start + size));
108   }
109 
GetBlockImpl(unsigned long pos,unsigned char * pBuf,unsigned long size)110   int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
111     if (!IsDataAvailImpl(pos, size))
112       return 0;
113     const unsigned long end = std::min(
114         pdfium::checked_cast<unsigned long>(file_contents_.size()), pos + size);
115     if (end <= pos)
116       return 0;
117     const unsigned long bytes_to_copy = end - pos;
118     fxcrt::Copy(file_contents().subspan(pos, bytes_to_copy),
119                 UNSAFE_TODO(pdfium::make_span(pBuf, size)));
120     SetDataAvailable(pos, bytes_to_copy);
121     return static_cast<int>(bytes_to_copy);
122   }
123 
AddSegmentImpl(size_t offset,size_t size)124   void AddSegmentImpl(size_t offset, size_t size) {
125     requested_segments_.emplace_back(offset, size);
126     max_requested_bound_ = std::max(max_requested_bound_, offset + size);
127   }
128 
IsDataAvailImpl(size_t offset,size_t size)129   bool IsDataAvailImpl(size_t offset, size_t size) {
130     if (offset + size > file_contents_.size()) {
131       return false;
132     }
133     if (is_new_data_available_) {
134       SetDataAvailable(offset, size);
135       return true;
136     }
137     return CheckDataAlreadyAvailable(offset, size);
138   }
139 
SGetBlock(void * param,unsigned long pos,unsigned char * pBuf,unsigned long size)140   static int SGetBlock(void* param,
141                        unsigned long pos,
142                        unsigned char* pBuf,
143                        unsigned long size) {
144     return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
145   }
146 
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)147   static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
148     return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
149   }
150 
SIsDataAvail(FX_FILEAVAIL * pThis,size_t offset,size_t size)151   static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
152                                 size_t offset,
153                                 size_t size) {
154     return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
155   }
156 
157   FPDF_FILEACCESS file_access_;
158 
159   std::vector<uint8_t> file_contents_;
160   std::vector<std::pair<size_t, size_t>> requested_segments_;
161   size_t max_requested_bound_ = 0;
162   bool is_new_data_available_ = true;
163 
164   RangeSet available_ranges_;
165 };
166 
167 }  // namespace
168 
169 class FPDFDataAvailEmbedderTest : public EmbedderTest {};
170 
TEST_F(FPDFDataAvailEmbedderTest,TrailerUnterminated)171 TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) {
172   // Document must load without crashing but is too malformed to be available.
173   EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
174   MockDownloadHints hints;
175   EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints));
176 }
177 
TEST_F(FPDFDataAvailEmbedderTest,TrailerAsHexstring)178 TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) {
179   // Document must load without crashing but is too malformed to be available.
180   EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
181   MockDownloadHints hints;
182   EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints));
183 }
184 
TEST_F(FPDFDataAvailEmbedderTest,LoadUsingHintTables)185 TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) {
186   TestAsyncLoader loader("feature_linearized_loading.pdf");
187   CreateAvail(loader.file_avail(), loader.file_access());
188   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
189   SetDocumentFromAvail();
190   ASSERT_TRUE(document());
191   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints()));
192 
193   // No new data available, to prevent load "Pages" node.
194   loader.set_is_new_data_available(false);
195   ScopedFPDFPage page(FPDF_LoadPage(document(), 1));
196   EXPECT_TRUE(page);
197 }
198 
TEST_F(FPDFDataAvailEmbedderTest,CheckFormAvailIfLinearized)199 TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) {
200   TestAsyncLoader loader("feature_linearized_loading.pdf");
201   CreateAvail(loader.file_avail(), loader.file_access());
202   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
203   SetDocumentFromAvail();
204   ASSERT_TRUE(document());
205 
206   // Prevent access to non-requested data to coerce the parser to send new
207   // request for non available (non-requested before) data.
208   loader.set_is_new_data_available(false);
209   loader.ClearRequestedSegments();
210 
211   int status = PDF_FORM_NOTAVAIL;
212   while (status == PDF_FORM_NOTAVAIL) {
213     loader.FlushRequestedData();
214     status = FPDFAvail_IsFormAvail(avail(), loader.hints());
215   }
216   EXPECT_NE(PDF_FORM_ERROR, status);
217 }
218 
TEST_F(FPDFDataAvailEmbedderTest,DoNotLoadMainCrossRefForFirstPageIfLinearized)219 TEST_F(FPDFDataAvailEmbedderTest,
220        DoNotLoadMainCrossRefForFirstPageIfLinearized) {
221   TestAsyncLoader loader("feature_linearized_loading.pdf");
222   CreateAvail(loader.file_avail(), loader.file_access());
223   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
224   SetDocumentFromAvail();
225   ASSERT_TRUE(document());
226   const int first_page_num = FPDFAvail_GetFirstPageNum(document());
227 
228   // The main cross ref table should not be processed.
229   // (It is always at file end)
230   EXPECT_GT(loader.file_access()->m_FileLen,
231             loader.max_already_available_bound());
232 
233   // Prevent access to non-requested data to coerce the parser to send new
234   // request for non available (non-requested before) data.
235   loader.set_is_new_data_available(false);
236   FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints());
237 
238   // The main cross ref table should not be requested.
239   // (It is always at file end)
240   EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
241 
242   // Allow parse page.
243   loader.set_is_new_data_available(true);
244   ASSERT_EQ(PDF_DATA_AVAIL,
245             FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints()));
246 
247   // The main cross ref table should not be processed.
248   // (It is always at file end)
249   EXPECT_GT(loader.file_access()->m_FileLen,
250             loader.max_already_available_bound());
251 
252   // Prevent loading data, while page loading.
253   loader.set_is_new_data_available(false);
254   ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num));
255   EXPECT_TRUE(page);
256 }
257 
TEST_F(FPDFDataAvailEmbedderTest,LoadSecondPageIfLinearizedWithHints)258 TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) {
259   TestAsyncLoader loader("feature_linearized_loading.pdf");
260   CreateAvail(loader.file_avail(), loader.file_access());
261   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
262   SetDocumentFromAvail();
263   ASSERT_TRUE(document());
264 
265   static constexpr uint32_t kSecondPageNum = 1;
266 
267   // Prevent access to non-requested data to coerce the parser to send new
268   // request for non available (non-requested before) data.
269   loader.set_is_new_data_available(false);
270   loader.ClearRequestedSegments();
271 
272   int status = PDF_DATA_NOTAVAIL;
273   while (status == PDF_DATA_NOTAVAIL) {
274     loader.FlushRequestedData();
275     status = FPDFAvail_IsPageAvail(avail(), kSecondPageNum, loader.hints());
276   }
277   EXPECT_EQ(PDF_DATA_AVAIL, status);
278 
279   // Prevent loading data, while page loading.
280   loader.set_is_new_data_available(false);
281   ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum));
282   EXPECT_TRUE(page);
283 }
284 
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingWholeDocument)285 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) {
286   TestAsyncLoader loader("linearized.pdf");
287   loader.set_is_new_data_available(false);
288   CreateAvail(loader.file_avail(), loader.file_access());
289   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
290     loader.FlushRequestedData();
291   }
292 
293   SetDocumentFromAvail();
294   ASSERT_TRUE(document());
295 
296   // The "info" dictionary should still be unavailable.
297   EXPECT_FALSE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0));
298 
299   // Simulate receiving whole file.
300   loader.set_is_new_data_available(true);
301   // Load second page, to parse additional crossref sections.
302   EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints()));
303 
304   EXPECT_TRUE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0));
305 }
306 
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingFirstPage)307 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) {
308   TestAsyncLoader loader("linearized.pdf");
309   // Map "Info" to an object within the first section without breaking
310   // linearization.
311   ByteString data(ByteStringView(loader.file_contents()));
312   std::optional<size_t> index = data.Find("/Info 27 0 R");
313   ASSERT_TRUE(index.has_value());
314   auto span = loader.mutable_file_contents().subspan(index.value()).subspan(7);
315   ASSERT_FALSE(span.empty());
316   EXPECT_EQ('7', span[0]);
317   span[0] = '9';
318 
319   loader.set_is_new_data_available(false);
320   CreateAvail(loader.file_avail(), loader.file_access());
321   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
322     loader.FlushRequestedData();
323   }
324 
325   SetDocumentFromAvail();
326   ASSERT_TRUE(document());
327 
328   // The "Info" dictionary should be available for the linearized document, if
329   // it is located in the first page section.
330   // Info was remapped to a dictionary with Type "Catalog"
331   unsigned short buffer[100] = {0};
332   EXPECT_TRUE(FPDF_GetMetaText(document(), "Type", buffer, sizeof(buffer)));
333   EXPECT_EQ(L"Catalog", GetPlatformWString(buffer));
334 }
335 
TEST_F(FPDFDataAvailEmbedderTest,TryLoadInvalidInfo)336 TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) {
337   TestAsyncLoader loader("linearized.pdf");
338   // Map "Info" to an invalid object without breaking linearization.
339   ByteString data(ByteStringView(loader.file_contents()));
340   std::optional<size_t> index = data.Find("/Info 27 0 R");
341   ASSERT_TRUE(index.has_value());
342   auto span = loader.mutable_file_contents().subspan(index.value()).subspan(6);
343   ASSERT_GE(span.size(), 2u);
344   EXPECT_EQ('2', span[0]);
345   EXPECT_EQ('7', span[1]);
346   span[0] = '9';
347   span[1] = '9';
348 
349   loader.set_is_new_data_available(false);
350   CreateAvail(loader.file_avail(), loader.file_access());
351   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
352     loader.FlushRequestedData();
353   }
354 
355   SetDocumentFromAvail();
356   ASSERT_TRUE(document());
357 
358   // Set all data available.
359   loader.set_is_new_data_available(true);
360   // Check second page, to load additional crossrefs.
361   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints()));
362 
363   // Test that api is robust enough to handle the bad case.
364   EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0));
365 }
366 
TEST_F(FPDFDataAvailEmbedderTest,TryLoadNonExistsInfo)367 TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) {
368   TestAsyncLoader loader("linearized.pdf");
369   // Break the "Info" parameter without breaking linearization.
370   ByteString data(ByteStringView(loader.file_contents()));
371   std::optional<size_t> index = data.Find("/Info 27 0 R");
372   ASSERT_TRUE(index.has_value());
373   auto span = loader.mutable_file_contents().subspan(index.value()).subspan(2);
374   ASSERT_FALSE(span.empty());
375   EXPECT_EQ('n', span[0]);
376   span[0] = '_';
377 
378   loader.set_is_new_data_available(false);
379   CreateAvail(loader.file_avail(), loader.file_access());
380   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
381     loader.FlushRequestedData();
382   }
383 
384   SetDocumentFromAvail();
385   ASSERT_TRUE(document());
386 
387   // Set all data available.
388   loader.set_is_new_data_available(true);
389   // Check second page, to load additional crossrefs.
390   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints()));
391 
392   // Test that api is robust enough to handle the bad case.
393   EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0));
394 }
395 
TEST_F(FPDFDataAvailEmbedderTest,BadInputsToAPIs)396 TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) {
397   EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr));
398   EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr));
399   EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr));
400   EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr));
401   EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr));
402   EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr));
403 }
404 
TEST_F(FPDFDataAvailEmbedderTest,NegativePageIndex)405 TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) {
406   TestAsyncLoader loader("linearized.pdf");
407   CreateAvail(loader.file_avail(), loader.file_access());
408   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
409   EXPECT_EQ(PDF_DATA_NOTAVAIL,
410             FPDFAvail_IsPageAvail(avail(), -1, loader.hints()));
411 }
412 
TEST_F(FPDFDataAvailEmbedderTest,Bug1324189)413 TEST_F(FPDFDataAvailEmbedderTest, Bug1324189) {
414   // Test passes if it doesn't crash.
415   TestAsyncLoader loader("bug_1324189.pdf");
416   CreateAvail(loader.file_avail(), loader.file_access());
417   ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
418 }
419 
TEST_F(FPDFDataAvailEmbedderTest,Bug1324503)420 TEST_F(FPDFDataAvailEmbedderTest, Bug1324503) {
421   // Test passes if it doesn't crash.
422   TestAsyncLoader loader("bug_1324503.pdf");
423   CreateAvail(loader.file_avail(), loader.file_access());
424   ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
425 }
426