• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <memory>
7 #include <string>
8 #include <utility>
9 #include <vector>
10 
11 #include "core/fxcrt/bytestring.h"
12 #include "core/fxcrt/widestring.h"
13 #include "public/fpdfview.h"
14 #include "testing/embedder_test.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16 #include "testing/range_set.h"
17 #include "testing/utils/file_util.h"
18 #include "testing/utils/path_service.h"
19 
20 namespace {
21 
22 class MockDownloadHints final : public FX_DOWNLOADHINTS {
23  public:
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)24   static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
25   }
26 
MockDownloadHints()27   MockDownloadHints() {
28     FX_DOWNLOADHINTS::version = 1;
29     FX_DOWNLOADHINTS::AddSegment = SAddSegment;
30   }
31 
32   ~MockDownloadHints() = default;
33 };
34 
35 class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
36  public:
TestAsyncLoader(const std::string & file_name)37   explicit TestAsyncLoader(const std::string& file_name) {
38     std::string file_path;
39     if (!PathService::GetTestFilePath(file_name, &file_path))
40       return;
41     file_contents_ = GetFileContents(file_path.c_str(), &file_length_);
42     if (!file_contents_)
43       return;
44 
45     file_access_.m_FileLen = static_cast<unsigned long>(file_length_);
46     file_access_.m_GetBlock = SGetBlock;
47     file_access_.m_Param = this;
48 
49     FX_DOWNLOADHINTS::version = 1;
50     FX_DOWNLOADHINTS::AddSegment = SAddSegment;
51 
52     FX_FILEAVAIL::version = 1;
53     FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
54   }
55 
IsOpened() const56   bool IsOpened() const { return !!file_contents_; }
57 
file_access()58   FPDF_FILEACCESS* file_access() { return &file_access_; }
hints()59   FX_DOWNLOADHINTS* hints() { return this; }
file_avail()60   FX_FILEAVAIL* file_avail() { return this; }
61 
requested_segments() const62   const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
63     return requested_segments_;
64   }
65 
max_requested_bound() const66   size_t max_requested_bound() const { return max_requested_bound_; }
67 
ClearRequestedSegments()68   void ClearRequestedSegments() {
69     requested_segments_.clear();
70     max_requested_bound_ = 0;
71   }
72 
is_new_data_available() const73   bool is_new_data_available() const { return is_new_data_available_; }
set_is_new_data_available(bool is_new_data_available)74   void set_is_new_data_available(bool is_new_data_available) {
75     is_new_data_available_ = is_new_data_available;
76   }
77 
max_already_available_bound() const78   size_t max_already_available_bound() const {
79     return available_ranges_.IsEmpty()
80                ? 0
81                : available_ranges_.ranges().rbegin()->second;
82   }
83 
FlushRequestedData()84   void FlushRequestedData() {
85     for (const auto& it : requested_segments_) {
86       SetDataAvailable(it.first, it.second);
87     }
88     ClearRequestedSegments();
89   }
90 
file_contents()91   char* file_contents() { return file_contents_.get(); }
file_length() const92   size_t file_length() const { return file_length_; }
93 
94  private:
SetDataAvailable(size_t start,size_t size)95   void SetDataAvailable(size_t start, size_t size) {
96     available_ranges_.Union(RangeSet::Range(start, start + size));
97   }
98 
CheckDataAlreadyAvailable(size_t start,size_t size) const99   bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
100     return available_ranges_.Contains(RangeSet::Range(start, start + size));
101   }
102 
GetBlockImpl(unsigned long pos,unsigned char * pBuf,unsigned long size)103   int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
104     if (!IsDataAvailImpl(pos, size))
105       return 0;
106     const unsigned long end =
107         std::min(static_cast<unsigned long>(file_length_), pos + size);
108     if (end <= pos)
109       return 0;
110     memcpy(pBuf, file_contents_.get() + pos, end - pos);
111     SetDataAvailable(pos, end - pos);
112     return static_cast<int>(end - pos);
113   }
114 
AddSegmentImpl(size_t offset,size_t size)115   void AddSegmentImpl(size_t offset, size_t size) {
116     requested_segments_.push_back(std::make_pair(offset, size));
117     max_requested_bound_ = std::max(max_requested_bound_, offset + size);
118   }
119 
IsDataAvailImpl(size_t offset,size_t size)120   bool IsDataAvailImpl(size_t offset, size_t size) {
121     if (offset + size > file_length_)
122       return false;
123     if (is_new_data_available_) {
124       SetDataAvailable(offset, size);
125       return true;
126     }
127     return CheckDataAlreadyAvailable(offset, size);
128   }
129 
SGetBlock(void * param,unsigned long pos,unsigned char * pBuf,unsigned long size)130   static int SGetBlock(void* param,
131                        unsigned long pos,
132                        unsigned char* pBuf,
133                        unsigned long size) {
134     return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
135   }
136 
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)137   static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
138     return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
139   }
140 
SIsDataAvail(FX_FILEAVAIL * pThis,size_t offset,size_t size)141   static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
142                                 size_t offset,
143                                 size_t size) {
144     return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
145   }
146 
147   FPDF_FILEACCESS file_access_;
148 
149   std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
150   size_t file_length_ = 0;
151   std::vector<std::pair<size_t, size_t>> requested_segments_;
152   size_t max_requested_bound_ = 0;
153   bool is_new_data_available_ = true;
154 
155   RangeSet available_ranges_;
156 };
157 
158 }  // namespace
159 
160 class FPDFDataAvailEmbedderTest : public EmbedderTest {};
161 
TEST_F(FPDFDataAvailEmbedderTest,TrailerUnterminated)162 TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) {
163   // Document must load without crashing but is too malformed to be available.
164   EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
165   MockDownloadHints hints;
166   EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
167 }
168 
TEST_F(FPDFDataAvailEmbedderTest,TrailerAsHexstring)169 TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) {
170   // Document must load without crashing but is too malformed to be available.
171   EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
172   MockDownloadHints hints;
173   EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
174 }
175 
TEST_F(FPDFDataAvailEmbedderTest,LoadUsingHintTables)176 TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) {
177   TestAsyncLoader loader("feature_linearized_loading.pdf");
178   avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
179   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
180   document_ = FPDFAvail_GetDocument(avail_, nullptr);
181   ASSERT_TRUE(document_);
182   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
183 
184   // No new data available, to prevent load "Pages" node.
185   loader.set_is_new_data_available(false);
186   ScopedFPDFPage page(FPDF_LoadPage(document(), 1));
187   EXPECT_TRUE(page);
188 }
189 
TEST_F(FPDFDataAvailEmbedderTest,CheckFormAvailIfLinearized)190 TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) {
191   TestAsyncLoader loader("feature_linearized_loading.pdf");
192   avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
193   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
194   document_ = FPDFAvail_GetDocument(avail_, nullptr);
195   ASSERT_TRUE(document_);
196 
197   // Prevent access to non-requested data to coerce the parser to send new
198   // request for non available (non-requested before) data.
199   loader.set_is_new_data_available(false);
200   loader.ClearRequestedSegments();
201 
202   int status = PDF_FORM_NOTAVAIL;
203   while (status == PDF_FORM_NOTAVAIL) {
204     loader.FlushRequestedData();
205     status = FPDFAvail_IsFormAvail(avail_, loader.hints());
206   }
207   EXPECT_NE(PDF_FORM_ERROR, status);
208 }
209 
TEST_F(FPDFDataAvailEmbedderTest,DoNotLoadMainCrossRefForFirstPageIfLinearized)210 TEST_F(FPDFDataAvailEmbedderTest,
211        DoNotLoadMainCrossRefForFirstPageIfLinearized) {
212   TestAsyncLoader loader("feature_linearized_loading.pdf");
213   avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
214   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
215   document_ = FPDFAvail_GetDocument(avail_, nullptr);
216   ASSERT_TRUE(document_);
217   const int first_page_num = FPDFAvail_GetFirstPageNum(document_);
218 
219   // The main cross ref table should not be processed.
220   // (It is always at file end)
221   EXPECT_GT(loader.file_access()->m_FileLen,
222             loader.max_already_available_bound());
223 
224   // Prevent access to non-requested data to coerce the parser to send new
225   // request for non available (non-requested before) data.
226   loader.set_is_new_data_available(false);
227   FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints());
228 
229   // The main cross ref table should not be requested.
230   // (It is always at file end)
231   EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
232 
233   // Allow parse page.
234   loader.set_is_new_data_available(true);
235   ASSERT_EQ(PDF_DATA_AVAIL,
236             FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()));
237 
238   // The main cross ref table should not be processed.
239   // (It is always at file end)
240   EXPECT_GT(loader.file_access()->m_FileLen,
241             loader.max_already_available_bound());
242 
243   // Prevent loading data, while page loading.
244   loader.set_is_new_data_available(false);
245   ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num));
246   EXPECT_TRUE(page);
247 }
248 
TEST_F(FPDFDataAvailEmbedderTest,LoadSecondPageIfLinearizedWithHints)249 TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) {
250   TestAsyncLoader loader("feature_linearized_loading.pdf");
251   avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
252   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
253   document_ = FPDFAvail_GetDocument(avail_, nullptr);
254   ASSERT_TRUE(document_);
255 
256   static constexpr uint32_t kSecondPageNum = 1;
257 
258   // Prevent access to non-requested data to coerce the parser to send new
259   // request for non available (non-requested before) data.
260   loader.set_is_new_data_available(false);
261   loader.ClearRequestedSegments();
262 
263   int status = PDF_DATA_NOTAVAIL;
264   while (status == PDF_DATA_NOTAVAIL) {
265     loader.FlushRequestedData();
266     status = FPDFAvail_IsPageAvail(avail_, kSecondPageNum, loader.hints());
267   }
268   EXPECT_EQ(PDF_DATA_AVAIL, status);
269 
270   // Prevent loading data, while page loading.
271   loader.set_is_new_data_available(false);
272   ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum));
273   EXPECT_TRUE(page);
274 }
275 
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingWholeDocument)276 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) {
277   TestAsyncLoader loader("linearized.pdf");
278   loader.set_is_new_data_available(false);
279   avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
280   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
281     loader.FlushRequestedData();
282   }
283 
284   document_ = FPDFAvail_GetDocument(avail_, nullptr);
285   ASSERT_TRUE(document_);
286 
287   // The "info" dictionary should still be unavailable.
288   EXPECT_FALSE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
289 
290   // Simulate receiving whole file.
291   loader.set_is_new_data_available(true);
292   // Load second page, to parse additional crossref sections.
293   EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
294 
295   EXPECT_TRUE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
296 }
297 
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingFirstPage)298 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) {
299   TestAsyncLoader loader("linearized.pdf");
300   // Map "Info" to an object within the first section without breaking
301   // linearization.
302   ByteString data(loader.file_contents(), loader.file_length());
303   Optional<size_t> index = data.Find("/Info 27 0 R");
304   ASSERT_TRUE(index);
305   memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12);
306 
307   loader.set_is_new_data_available(false);
308   avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
309   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
310     loader.FlushRequestedData();
311   }
312 
313   document_ = FPDFAvail_GetDocument(avail_, nullptr);
314   ASSERT_TRUE(document_);
315 
316   // The "Info" dictionary should be available for the linearized document, if
317   // it is located in the first page section.
318   // Info was remapped to a dictionary with Type "Catalog"
319   unsigned short buffer[100] = {0};
320   EXPECT_TRUE(FPDF_GetMetaText(document_, "Type", buffer, sizeof(buffer)));
321   constexpr wchar_t kExpectedValue[] = L"Catalog";
322   EXPECT_EQ(WideString(kExpectedValue),
323             WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedValue)));
324 }
325 
TEST_F(FPDFDataAvailEmbedderTest,TryLoadInvalidInfo)326 TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) {
327   TestAsyncLoader loader("linearized.pdf");
328   // Map "Info" to an invalid object without breaking linearization.
329   ByteString data(loader.file_contents(), loader.file_length());
330   Optional<size_t> index = data.Find("/Info 27 0 R");
331   ASSERT_TRUE(index);
332   memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12);
333 
334   loader.set_is_new_data_available(false);
335   avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
336   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
337     loader.FlushRequestedData();
338   }
339 
340   document_ = FPDFAvail_GetDocument(avail_, nullptr);
341   ASSERT_TRUE(document_);
342 
343   // Set all data available.
344   loader.set_is_new_data_available(true);
345   // Check second page, to load additional crossrefs.
346   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
347 
348   // Test that api is robust enough to handle the bad case.
349   EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
350 }
351 
TEST_F(FPDFDataAvailEmbedderTest,TryLoadNonExistsInfo)352 TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) {
353   TestAsyncLoader loader("linearized.pdf");
354   // Break the "Info" parameter without breaking linearization.
355   ByteString data(loader.file_contents(), loader.file_length());
356   Optional<size_t> index = data.Find("/Info 27 0 R");
357   ASSERT_TRUE(index);
358   memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12);
359 
360   loader.set_is_new_data_available(false);
361   avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
362   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
363     loader.FlushRequestedData();
364   }
365 
366   document_ = FPDFAvail_GetDocument(avail_, nullptr);
367   ASSERT_TRUE(document_);
368 
369   // Set all data available.
370   loader.set_is_new_data_available(true);
371   // Check second page, to load additional crossrefs.
372   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
373 
374   // Test that api is robust enough to handle the bad case.
375   EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
376 }
377 
TEST_F(FPDFDataAvailEmbedderTest,BadInputsToAPIs)378 TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) {
379   EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr));
380   EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr));
381   EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr));
382   EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr));
383   EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr));
384   EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr));
385 }
386 
TEST_F(FPDFDataAvailEmbedderTest,NegativePageIndex)387 TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) {
388   TestAsyncLoader loader("linearized.pdf");
389   avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
390   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
391   EXPECT_EQ(PDF_DATA_NOTAVAIL,
392             FPDFAvail_IsPageAvail(avail_, -1, loader.hints()));
393 }
394