1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "public/fpdf_ppo.h"
8
9 #include <map>
10 #include <memory>
11 #include <utility>
12 #include <vector>
13
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_document.h"
16 #include "core/fpdfapi/parser/cpdf_name.h"
17 #include "core/fpdfapi/parser/cpdf_number.h"
18 #include "core/fpdfapi/parser/cpdf_reference.h"
19 #include "core/fpdfapi/parser/cpdf_stream.h"
20 #include "core/fpdfapi/parser/cpdf_string.h"
21 #include "core/fxcrt/unowned_ptr.h"
22 #include "fpdfsdk/fsdk_define.h"
23 #include "third_party/base/ptr_util.h"
24
25 namespace {
26
PageDictGetInheritableTag(CPDF_Dictionary * pDict,const ByteString & bsSrcTag)27 CPDF_Object* PageDictGetInheritableTag(CPDF_Dictionary* pDict,
28 const ByteString& bsSrcTag) {
29 if (!pDict || bsSrcTag.IsEmpty())
30 return nullptr;
31 if (!pDict->KeyExist("Parent") || !pDict->KeyExist("Type"))
32 return nullptr;
33
34 CPDF_Object* pType = pDict->GetObjectFor("Type")->GetDirect();
35 if (!ToName(pType))
36 return nullptr;
37 if (pType->GetString().Compare("Page"))
38 return nullptr;
39
40 CPDF_Dictionary* pp =
41 ToDictionary(pDict->GetObjectFor("Parent")->GetDirect());
42 if (!pp)
43 return nullptr;
44
45 if (pDict->KeyExist(bsSrcTag))
46 return pDict->GetObjectFor(bsSrcTag);
47
48 while (pp) {
49 if (pp->KeyExist(bsSrcTag))
50 return pp->GetObjectFor(bsSrcTag);
51 if (!pp->KeyExist("Parent"))
52 break;
53 pp = ToDictionary(pp->GetObjectFor("Parent")->GetDirect());
54 }
55 return nullptr;
56 }
57
CopyInheritable(CPDF_Dictionary * pCurPageDict,CPDF_Dictionary * pSrcPageDict,const ByteString & key)58 bool CopyInheritable(CPDF_Dictionary* pCurPageDict,
59 CPDF_Dictionary* pSrcPageDict,
60 const ByteString& key) {
61 if (pCurPageDict->KeyExist(key))
62 return true;
63
64 CPDF_Object* pInheritable = PageDictGetInheritableTag(pSrcPageDict, key);
65 if (!pInheritable)
66 return false;
67
68 pCurPageDict->SetFor(key, pInheritable->Clone());
69 return true;
70 }
71
ParserPageRangeString(ByteString rangstring,std::vector<uint16_t> * pageArray,int nCount)72 bool ParserPageRangeString(ByteString rangstring,
73 std::vector<uint16_t>* pageArray,
74 int nCount) {
75 if (rangstring.IsEmpty())
76 return true;
77
78 rangstring.Remove(' ');
79 size_t nLength = rangstring.GetLength();
80 ByteString cbCompareString("0123456789-,");
81 for (size_t i = 0; i < nLength; ++i) {
82 if (!cbCompareString.Contains(rangstring[i]))
83 return false;
84 }
85
86 ByteString cbMidRange;
87 size_t nStringFrom = 0;
88 Optional<size_t> nStringTo = 0;
89 while (nStringTo < nLength) {
90 nStringTo = rangstring.Find(',', nStringFrom);
91 if (!nStringTo.has_value())
92 nStringTo = nLength;
93 cbMidRange = rangstring.Mid(nStringFrom, nStringTo.value() - nStringFrom);
94 auto nMid = cbMidRange.Find('-');
95 if (!nMid.has_value()) {
96 uint16_t pageNum =
97 pdfium::base::checked_cast<uint16_t>(atoi(cbMidRange.c_str()));
98 if (pageNum <= 0 || pageNum > nCount)
99 return false;
100 pageArray->push_back(pageNum);
101 } else {
102 uint16_t nStartPageNum = pdfium::base::checked_cast<uint16_t>(
103 atoi(cbMidRange.Left(nMid.value()).c_str()));
104 if (nStartPageNum == 0)
105 return false;
106
107 nMid = nMid.value() + 1;
108 size_t nEnd = cbMidRange.GetLength() - nMid.value();
109 if (nEnd == 0)
110 return false;
111
112 uint16_t nEndPageNum = pdfium::base::checked_cast<uint16_t>(
113 atoi(cbMidRange.Mid(nMid.value(), nEnd).c_str()));
114 if (nStartPageNum < 0 || nStartPageNum > nEndPageNum ||
115 nEndPageNum > nCount) {
116 return false;
117 }
118 for (uint16_t i = nStartPageNum; i <= nEndPageNum; ++i) {
119 pageArray->push_back(i);
120 }
121 }
122 nStringFrom = nStringTo.value() + 1;
123 }
124 return true;
125 }
126
127 } // namespace
128
129 class CPDF_PageOrganizer {
130 public:
131 CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc, CPDF_Document* pSrcPDFDoc);
132 ~CPDF_PageOrganizer();
133
134 bool PDFDocInit();
135 bool ExportPage(const std::vector<uint16_t>& pageNums, int nIndex);
136
137 private:
138 using ObjectNumberMap = std::map<uint32_t, uint32_t>;
139
140 bool UpdateReference(CPDF_Object* pObj, ObjectNumberMap* pObjNumberMap);
141 uint32_t GetNewObjId(ObjectNumberMap* pObjNumberMap, CPDF_Reference* pRef);
142
143 UnownedPtr<CPDF_Document> m_pDestPDFDoc;
144 UnownedPtr<CPDF_Document> m_pSrcPDFDoc;
145 };
146
CPDF_PageOrganizer(CPDF_Document * pDestPDFDoc,CPDF_Document * pSrcPDFDoc)147 CPDF_PageOrganizer::CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc,
148 CPDF_Document* pSrcPDFDoc)
149 : m_pDestPDFDoc(pDestPDFDoc), m_pSrcPDFDoc(pSrcPDFDoc) {}
150
~CPDF_PageOrganizer()151 CPDF_PageOrganizer::~CPDF_PageOrganizer() {}
152
PDFDocInit()153 bool CPDF_PageOrganizer::PDFDocInit() {
154 ASSERT(m_pDestPDFDoc);
155 ASSERT(m_pSrcPDFDoc);
156
157 CPDF_Dictionary* pNewRoot = m_pDestPDFDoc->GetRoot();
158 if (!pNewRoot)
159 return false;
160
161 CPDF_Dictionary* pDocInfoDict = m_pDestPDFDoc->GetInfo();
162 if (!pDocInfoDict)
163 return false;
164
165 pDocInfoDict->SetNewFor<CPDF_String>("Producer", "PDFium", false);
166
167 ByteString cbRootType = pNewRoot->GetStringFor("Type", "");
168 if (cbRootType.IsEmpty())
169 pNewRoot->SetNewFor<CPDF_Name>("Type", "Catalog");
170
171 CPDF_Object* pElement = pNewRoot->GetObjectFor("Pages");
172 CPDF_Dictionary* pNewPages =
173 pElement ? ToDictionary(pElement->GetDirect()) : nullptr;
174 if (!pNewPages) {
175 pNewPages = m_pDestPDFDoc->NewIndirect<CPDF_Dictionary>();
176 pNewRoot->SetNewFor<CPDF_Reference>("Pages", m_pDestPDFDoc.Get(),
177 pNewPages->GetObjNum());
178 }
179
180 ByteString cbPageType = pNewPages->GetStringFor("Type", "");
181 if (cbPageType.IsEmpty())
182 pNewPages->SetNewFor<CPDF_Name>("Type", "Pages");
183
184 if (!pNewPages->GetArrayFor("Kids")) {
185 pNewPages->SetNewFor<CPDF_Number>("Count", 0);
186 pNewPages->SetNewFor<CPDF_Reference>(
187 "Kids", m_pDestPDFDoc.Get(),
188 m_pDestPDFDoc->NewIndirect<CPDF_Array>()->GetObjNum());
189 }
190
191 return true;
192 }
193
ExportPage(const std::vector<uint16_t> & pageNums,int nIndex)194 bool CPDF_PageOrganizer::ExportPage(const std::vector<uint16_t>& pageNums,
195 int nIndex) {
196 int curpage = nIndex;
197 auto pObjNumberMap = pdfium::MakeUnique<ObjectNumberMap>();
198 for (size_t i = 0; i < pageNums.size(); ++i) {
199 CPDF_Dictionary* pCurPageDict = m_pDestPDFDoc->CreateNewPage(curpage);
200 CPDF_Dictionary* pSrcPageDict = m_pSrcPDFDoc->GetPage(pageNums[i] - 1);
201 if (!pSrcPageDict || !pCurPageDict)
202 return false;
203
204 // Clone the page dictionary
205 for (const auto& it : *pSrcPageDict) {
206 const ByteString& cbSrcKeyStr = it.first;
207 if (cbSrcKeyStr == "Type" || cbSrcKeyStr == "Parent")
208 continue;
209
210 CPDF_Object* pObj = it.second.get();
211 pCurPageDict->SetFor(cbSrcKeyStr, pObj->Clone());
212 }
213
214 // inheritable item
215 // Even though some entries are required by the PDF spec, there exist
216 // PDFs that omit them. Set some defaults in this case.
217 // 1 MediaBox - required
218 if (!CopyInheritable(pCurPageDict, pSrcPageDict, "MediaBox")) {
219 // Search for "CropBox" in the source page dictionary.
220 // If it does not exist, use the default letter size.
221 CPDF_Object* pInheritable =
222 PageDictGetInheritableTag(pSrcPageDict, "CropBox");
223 if (pInheritable) {
224 pCurPageDict->SetFor("MediaBox", pInheritable->Clone());
225 } else {
226 // Make the default size letter size (8.5"x11")
227 CPDF_Array* pArray = pCurPageDict->SetNewFor<CPDF_Array>("MediaBox");
228 pArray->AddNew<CPDF_Number>(0);
229 pArray->AddNew<CPDF_Number>(0);
230 pArray->AddNew<CPDF_Number>(612);
231 pArray->AddNew<CPDF_Number>(792);
232 }
233 }
234
235 // 2 Resources - required
236 if (!CopyInheritable(pCurPageDict, pSrcPageDict, "Resources")) {
237 // Use a default empty resources if it does not exist.
238 pCurPageDict->SetNewFor<CPDF_Dictionary>("Resources");
239 }
240
241 // 3 CropBox - optional
242 CopyInheritable(pCurPageDict, pSrcPageDict, "CropBox");
243 // 4 Rotate - optional
244 CopyInheritable(pCurPageDict, pSrcPageDict, "Rotate");
245
246 // Update the reference
247 uint32_t dwOldPageObj = pSrcPageDict->GetObjNum();
248 uint32_t dwNewPageObj = pCurPageDict->GetObjNum();
249 (*pObjNumberMap)[dwOldPageObj] = dwNewPageObj;
250 UpdateReference(pCurPageDict, pObjNumberMap.get());
251 ++curpage;
252 }
253
254 return true;
255 }
256
UpdateReference(CPDF_Object * pObj,ObjectNumberMap * pObjNumberMap)257 bool CPDF_PageOrganizer::UpdateReference(CPDF_Object* pObj,
258 ObjectNumberMap* pObjNumberMap) {
259 switch (pObj->GetType()) {
260 case CPDF_Object::REFERENCE: {
261 CPDF_Reference* pReference = pObj->AsReference();
262 uint32_t newobjnum = GetNewObjId(pObjNumberMap, pReference);
263 if (newobjnum == 0)
264 return false;
265 pReference->SetRef(m_pDestPDFDoc.Get(), newobjnum);
266 break;
267 }
268 case CPDF_Object::DICTIONARY: {
269 CPDF_Dictionary* pDict = pObj->AsDictionary();
270 auto it = pDict->begin();
271 while (it != pDict->end()) {
272 const ByteString& key = it->first;
273 CPDF_Object* pNextObj = it->second.get();
274 ++it;
275 if (key == "Parent" || key == "Prev" || key == "First")
276 continue;
277 if (!pNextObj)
278 return false;
279 if (!UpdateReference(pNextObj, pObjNumberMap))
280 pDict->RemoveFor(key);
281 }
282 break;
283 }
284 case CPDF_Object::ARRAY: {
285 CPDF_Array* pArray = pObj->AsArray();
286 for (size_t i = 0; i < pArray->GetCount(); ++i) {
287 CPDF_Object* pNextObj = pArray->GetObjectAt(i);
288 if (!pNextObj)
289 return false;
290 if (!UpdateReference(pNextObj, pObjNumberMap))
291 return false;
292 }
293 break;
294 }
295 case CPDF_Object::STREAM: {
296 CPDF_Stream* pStream = pObj->AsStream();
297 CPDF_Dictionary* pDict = pStream->GetDict();
298 if (!pDict)
299 return false;
300 if (!UpdateReference(pDict, pObjNumberMap))
301 return false;
302 break;
303 }
304 default:
305 break;
306 }
307
308 return true;
309 }
310
GetNewObjId(ObjectNumberMap * pObjNumberMap,CPDF_Reference * pRef)311 uint32_t CPDF_PageOrganizer::GetNewObjId(ObjectNumberMap* pObjNumberMap,
312 CPDF_Reference* pRef) {
313 if (!pRef)
314 return 0;
315
316 uint32_t dwObjnum = pRef->GetRefObjNum();
317 uint32_t dwNewObjNum = 0;
318 const auto it = pObjNumberMap->find(dwObjnum);
319 if (it != pObjNumberMap->end())
320 dwNewObjNum = it->second;
321 if (dwNewObjNum)
322 return dwNewObjNum;
323
324 CPDF_Object* pDirect = pRef->GetDirect();
325 if (!pDirect)
326 return 0;
327
328 std::unique_ptr<CPDF_Object> pClone = pDirect->Clone();
329 if (CPDF_Dictionary* pDictClone = pClone->AsDictionary()) {
330 if (pDictClone->KeyExist("Type")) {
331 ByteString strType = pDictClone->GetStringFor("Type");
332 if (!FXSYS_stricmp(strType.c_str(), "Pages"))
333 return 4;
334 if (!FXSYS_stricmp(strType.c_str(), "Page"))
335 return 0;
336 }
337 }
338 CPDF_Object* pUnownedClone =
339 m_pDestPDFDoc->AddIndirectObject(std::move(pClone));
340 dwNewObjNum = pUnownedClone->GetObjNum();
341 (*pObjNumberMap)[dwObjnum] = dwNewObjNum;
342 if (!UpdateReference(pUnownedClone, pObjNumberMap))
343 return 0;
344
345 return dwNewObjNum;
346 }
347
FPDF_ImportPages(FPDF_DOCUMENT dest_doc,FPDF_DOCUMENT src_doc,FPDF_BYTESTRING pagerange,int index)348 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_ImportPages(FPDF_DOCUMENT dest_doc,
349 FPDF_DOCUMENT src_doc,
350 FPDF_BYTESTRING pagerange,
351 int index) {
352 CPDF_Document* pDestDoc = CPDFDocumentFromFPDFDocument(dest_doc);
353 if (!dest_doc)
354 return false;
355
356 CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
357 if (!pSrcDoc)
358 return false;
359
360 std::vector<uint16_t> pageArray;
361 int nCount = pSrcDoc->GetPageCount();
362 if (pagerange) {
363 if (!ParserPageRangeString(pagerange, &pageArray, nCount))
364 return false;
365 } else {
366 for (int i = 1; i <= nCount; ++i) {
367 pageArray.push_back(i);
368 }
369 }
370
371 CPDF_PageOrganizer pageOrg(pDestDoc, pSrcDoc);
372 return pageOrg.PDFDocInit() && pageOrg.ExportPage(pageArray, index);
373 }
374
375 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc,FPDF_DOCUMENT src_doc)376 FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc, FPDF_DOCUMENT src_doc) {
377 CPDF_Document* pDstDoc = CPDFDocumentFromFPDFDocument(dest_doc);
378 if (!pDstDoc)
379 return false;
380
381 CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
382 if (!pSrcDoc)
383 return false;
384
385 CPDF_Dictionary* pSrcDict = pSrcDoc->GetRoot();
386 pSrcDict = pSrcDict->GetDictFor("ViewerPreferences");
387 if (!pSrcDict)
388 return false;
389
390 CPDF_Dictionary* pDstDict = pDstDoc->GetRoot();
391 if (!pDstDict)
392 return false;
393
394 pDstDict->SetFor("ViewerPreferences", pSrcDict->CloneDirectObject());
395 return true;
396 }
397