1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "public/fpdf_ppo.h"
8
9 #include <map>
10 #include <memory>
11 #include <utility>
12 #include <vector>
13
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_document.h"
16 #include "core/fpdfapi/parser/cpdf_name.h"
17 #include "core/fpdfapi/parser/cpdf_number.h"
18 #include "core/fpdfapi/parser/cpdf_reference.h"
19 #include "core/fpdfapi/parser/cpdf_stream.h"
20 #include "core/fpdfapi/parser/cpdf_string.h"
21 #include "fpdfsdk/fsdk_define.h"
22 #include "third_party/base/ptr_util.h"
23 #include "third_party/base/stl_util.h"
24
25 namespace {
26
PageDictGetInheritableTag(CPDF_Dictionary * pDict,const CFX_ByteString & bsSrcTag)27 CPDF_Object* PageDictGetInheritableTag(CPDF_Dictionary* pDict,
28 const CFX_ByteString& bsSrcTag) {
29 if (!pDict || bsSrcTag.IsEmpty())
30 return nullptr;
31 if (!pDict->KeyExist("Parent") || !pDict->KeyExist("Type"))
32 return nullptr;
33
34 CPDF_Object* pType = pDict->GetObjectFor("Type")->GetDirect();
35 if (!ToName(pType))
36 return nullptr;
37 if (pType->GetString().Compare("Page"))
38 return nullptr;
39
40 CPDF_Dictionary* pp =
41 ToDictionary(pDict->GetObjectFor("Parent")->GetDirect());
42 if (!pp)
43 return nullptr;
44
45 if (pDict->KeyExist(bsSrcTag))
46 return pDict->GetObjectFor(bsSrcTag);
47
48 while (pp) {
49 if (pp->KeyExist(bsSrcTag))
50 return pp->GetObjectFor(bsSrcTag);
51 if (!pp->KeyExist("Parent"))
52 break;
53 pp = ToDictionary(pp->GetObjectFor("Parent")->GetDirect());
54 }
55 return nullptr;
56 }
57
CopyInheritable(CPDF_Dictionary * pCurPageDict,CPDF_Dictionary * pSrcPageDict,const CFX_ByteString & key)58 bool CopyInheritable(CPDF_Dictionary* pCurPageDict,
59 CPDF_Dictionary* pSrcPageDict,
60 const CFX_ByteString& key) {
61 if (pCurPageDict->KeyExist(key))
62 return true;
63
64 CPDF_Object* pInheritable = PageDictGetInheritableTag(pSrcPageDict, key);
65 if (!pInheritable)
66 return false;
67
68 pCurPageDict->SetFor(key, pInheritable->Clone());
69 return true;
70 }
71
ParserPageRangeString(CFX_ByteString rangstring,std::vector<uint16_t> * pageArray,int nCount)72 bool ParserPageRangeString(CFX_ByteString rangstring,
73 std::vector<uint16_t>* pageArray,
74 int nCount) {
75 if (rangstring.IsEmpty())
76 return true;
77
78 rangstring.Remove(' ');
79 int nLength = rangstring.GetLength();
80 CFX_ByteString cbCompareString("0123456789-,");
81 for (int i = 0; i < nLength; ++i) {
82 if (cbCompareString.Find(rangstring[i]) == -1)
83 return false;
84 }
85
86 CFX_ByteString cbMidRange;
87 int nStringFrom = 0;
88 int nStringTo = 0;
89 while (nStringTo < nLength) {
90 nStringTo = rangstring.Find(',', nStringFrom);
91 if (nStringTo == -1)
92 nStringTo = nLength;
93 cbMidRange = rangstring.Mid(nStringFrom, nStringTo - nStringFrom);
94 int nMid = cbMidRange.Find('-');
95 if (nMid == -1) {
96 long lPageNum = atol(cbMidRange.c_str());
97 if (lPageNum <= 0 || lPageNum > nCount)
98 return false;
99 pageArray->push_back((uint16_t)lPageNum);
100 } else {
101 int nStartPageNum = atol(cbMidRange.Mid(0, nMid).c_str());
102 if (nStartPageNum == 0)
103 return false;
104
105 ++nMid;
106 int nEnd = cbMidRange.GetLength() - nMid;
107 if (nEnd == 0)
108 return false;
109
110 int nEndPageNum = atol(cbMidRange.Mid(nMid, nEnd).c_str());
111 if (nStartPageNum < 0 || nStartPageNum > nEndPageNum ||
112 nEndPageNum > nCount) {
113 return false;
114 }
115 for (int i = nStartPageNum; i <= nEndPageNum; ++i) {
116 pageArray->push_back(i);
117 }
118 }
119 nStringFrom = nStringTo + 1;
120 }
121 return true;
122 }
123
124 } // namespace
125
126 class CPDF_PageOrganizer {
127 public:
128 CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc, CPDF_Document* pSrcPDFDoc);
129 ~CPDF_PageOrganizer();
130
131 bool PDFDocInit();
132 bool ExportPage(const std::vector<uint16_t>& pageNums, int nIndex);
133
134 private:
135 using ObjectNumberMap = std::map<uint32_t, uint32_t>;
136
137 bool UpdateReference(CPDF_Object* pObj, ObjectNumberMap* pObjNumberMap);
138 uint32_t GetNewObjId(ObjectNumberMap* pObjNumberMap, CPDF_Reference* pRef);
139
140 CPDF_Document* m_pDestPDFDoc;
141 CPDF_Document* m_pSrcPDFDoc;
142 };
143
CPDF_PageOrganizer(CPDF_Document * pDestPDFDoc,CPDF_Document * pSrcPDFDoc)144 CPDF_PageOrganizer::CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc,
145 CPDF_Document* pSrcPDFDoc)
146 : m_pDestPDFDoc(pDestPDFDoc), m_pSrcPDFDoc(pSrcPDFDoc) {}
147
~CPDF_PageOrganizer()148 CPDF_PageOrganizer::~CPDF_PageOrganizer() {}
149
PDFDocInit()150 bool CPDF_PageOrganizer::PDFDocInit() {
151 ASSERT(m_pDestPDFDoc);
152 ASSERT(m_pSrcPDFDoc);
153
154 CPDF_Dictionary* pNewRoot = m_pDestPDFDoc->GetRoot();
155 if (!pNewRoot)
156 return false;
157
158 CPDF_Dictionary* pDocInfoDict = m_pDestPDFDoc->GetInfo();
159 if (!pDocInfoDict)
160 return false;
161
162 pDocInfoDict->SetNewFor<CPDF_String>("Producer", "PDFium", false);
163
164 CFX_ByteString cbRootType = pNewRoot->GetStringFor("Type", "");
165 if (cbRootType.IsEmpty())
166 pNewRoot->SetNewFor<CPDF_Name>("Type", "Catalog");
167
168 CPDF_Object* pElement = pNewRoot->GetObjectFor("Pages");
169 CPDF_Dictionary* pNewPages =
170 pElement ? ToDictionary(pElement->GetDirect()) : nullptr;
171 if (!pNewPages) {
172 pNewPages = m_pDestPDFDoc->NewIndirect<CPDF_Dictionary>();
173 pNewRoot->SetNewFor<CPDF_Reference>("Pages", m_pDestPDFDoc,
174 pNewPages->GetObjNum());
175 }
176
177 CFX_ByteString cbPageType = pNewPages->GetStringFor("Type", "");
178 if (cbPageType.IsEmpty())
179 pNewPages->SetNewFor<CPDF_Name>("Type", "Pages");
180
181 if (!pNewPages->GetArrayFor("Kids")) {
182 pNewPages->SetNewFor<CPDF_Number>("Count", 0);
183 pNewPages->SetNewFor<CPDF_Reference>(
184 "Kids", m_pDestPDFDoc,
185 m_pDestPDFDoc->NewIndirect<CPDF_Array>()->GetObjNum());
186 }
187
188 return true;
189 }
190
ExportPage(const std::vector<uint16_t> & pageNums,int nIndex)191 bool CPDF_PageOrganizer::ExportPage(const std::vector<uint16_t>& pageNums,
192 int nIndex) {
193 int curpage = nIndex;
194 auto pObjNumberMap = pdfium::MakeUnique<ObjectNumberMap>();
195 int nSize = pdfium::CollectionSize<int>(pageNums);
196 for (int i = 0; i < nSize; ++i) {
197 CPDF_Dictionary* pCurPageDict = m_pDestPDFDoc->CreateNewPage(curpage);
198 CPDF_Dictionary* pSrcPageDict = m_pSrcPDFDoc->GetPage(pageNums[i] - 1);
199 if (!pSrcPageDict || !pCurPageDict)
200 return false;
201
202 // Clone the page dictionary
203 for (const auto& it : *pSrcPageDict) {
204 const CFX_ByteString& cbSrcKeyStr = it.first;
205 if (cbSrcKeyStr == "Type" || cbSrcKeyStr == "Parent")
206 continue;
207
208 CPDF_Object* pObj = it.second.get();
209 pCurPageDict->SetFor(cbSrcKeyStr, pObj->Clone());
210 }
211
212 // inheritable item
213 // 1 MediaBox - required
214 if (!CopyInheritable(pCurPageDict, pSrcPageDict, "MediaBox")) {
215 // Search for "CropBox" in the source page dictionary,
216 // if it does not exists, use the default letter size.
217 CPDF_Object* pInheritable =
218 PageDictGetInheritableTag(pSrcPageDict, "CropBox");
219 if (pInheritable) {
220 pCurPageDict->SetFor("MediaBox", pInheritable->Clone());
221 } else {
222 // Make the default size to be letter size (8.5'x11')
223 CPDF_Array* pArray = pCurPageDict->SetNewFor<CPDF_Array>("MediaBox");
224 pArray->AddNew<CPDF_Number>(0);
225 pArray->AddNew<CPDF_Number>(0);
226 pArray->AddNew<CPDF_Number>(612);
227 pArray->AddNew<CPDF_Number>(792);
228 }
229 }
230
231 // 2 Resources - required
232 if (!CopyInheritable(pCurPageDict, pSrcPageDict, "Resources"))
233 return false;
234
235 // 3 CropBox - optional
236 CopyInheritable(pCurPageDict, pSrcPageDict, "CropBox");
237 // 4 Rotate - optional
238 CopyInheritable(pCurPageDict, pSrcPageDict, "Rotate");
239
240 // Update the reference
241 uint32_t dwOldPageObj = pSrcPageDict->GetObjNum();
242 uint32_t dwNewPageObj = pCurPageDict->GetObjNum();
243 (*pObjNumberMap)[dwOldPageObj] = dwNewPageObj;
244 UpdateReference(pCurPageDict, pObjNumberMap.get());
245 ++curpage;
246 }
247
248 return true;
249 }
250
UpdateReference(CPDF_Object * pObj,ObjectNumberMap * pObjNumberMap)251 bool CPDF_PageOrganizer::UpdateReference(CPDF_Object* pObj,
252 ObjectNumberMap* pObjNumberMap) {
253 switch (pObj->GetType()) {
254 case CPDF_Object::REFERENCE: {
255 CPDF_Reference* pReference = pObj->AsReference();
256 uint32_t newobjnum = GetNewObjId(pObjNumberMap, pReference);
257 if (newobjnum == 0)
258 return false;
259 pReference->SetRef(m_pDestPDFDoc, newobjnum);
260 break;
261 }
262 case CPDF_Object::DICTIONARY: {
263 CPDF_Dictionary* pDict = pObj->AsDictionary();
264 auto it = pDict->begin();
265 while (it != pDict->end()) {
266 const CFX_ByteString& key = it->first;
267 CPDF_Object* pNextObj = it->second.get();
268 ++it;
269 if (key == "Parent" || key == "Prev" || key == "First")
270 continue;
271 if (!pNextObj)
272 return false;
273 if (!UpdateReference(pNextObj, pObjNumberMap))
274 pDict->RemoveFor(key);
275 }
276 break;
277 }
278 case CPDF_Object::ARRAY: {
279 CPDF_Array* pArray = pObj->AsArray();
280 for (size_t i = 0; i < pArray->GetCount(); ++i) {
281 CPDF_Object* pNextObj = pArray->GetObjectAt(i);
282 if (!pNextObj)
283 return false;
284 if (!UpdateReference(pNextObj, pObjNumberMap))
285 return false;
286 }
287 break;
288 }
289 case CPDF_Object::STREAM: {
290 CPDF_Stream* pStream = pObj->AsStream();
291 CPDF_Dictionary* pDict = pStream->GetDict();
292 if (!pDict)
293 return false;
294 if (!UpdateReference(pDict, pObjNumberMap))
295 return false;
296 break;
297 }
298 default:
299 break;
300 }
301
302 return true;
303 }
304
GetNewObjId(ObjectNumberMap * pObjNumberMap,CPDF_Reference * pRef)305 uint32_t CPDF_PageOrganizer::GetNewObjId(ObjectNumberMap* pObjNumberMap,
306 CPDF_Reference* pRef) {
307 if (!pRef)
308 return 0;
309
310 uint32_t dwObjnum = pRef->GetRefObjNum();
311 uint32_t dwNewObjNum = 0;
312 const auto it = pObjNumberMap->find(dwObjnum);
313 if (it != pObjNumberMap->end())
314 dwNewObjNum = it->second;
315 if (dwNewObjNum)
316 return dwNewObjNum;
317
318 CPDF_Object* pDirect = pRef->GetDirect();
319 if (!pDirect)
320 return 0;
321
322 std::unique_ptr<CPDF_Object> pClone = pDirect->Clone();
323 if (CPDF_Dictionary* pDictClone = pClone->AsDictionary()) {
324 if (pDictClone->KeyExist("Type")) {
325 CFX_ByteString strType = pDictClone->GetStringFor("Type");
326 if (!FXSYS_stricmp(strType.c_str(), "Pages"))
327 return 4;
328 if (!FXSYS_stricmp(strType.c_str(), "Page"))
329 return 0;
330 }
331 }
332 CPDF_Object* pUnownedClone =
333 m_pDestPDFDoc->AddIndirectObject(std::move(pClone));
334 dwNewObjNum = pUnownedClone->GetObjNum();
335 (*pObjNumberMap)[dwObjnum] = dwNewObjNum;
336 if (!UpdateReference(pUnownedClone, pObjNumberMap))
337 return 0;
338
339 return dwNewObjNum;
340 }
341
FPDF_ImportPages(FPDF_DOCUMENT dest_doc,FPDF_DOCUMENT src_doc,FPDF_BYTESTRING pagerange,int index)342 DLLEXPORT FPDF_BOOL STDCALL FPDF_ImportPages(FPDF_DOCUMENT dest_doc,
343 FPDF_DOCUMENT src_doc,
344 FPDF_BYTESTRING pagerange,
345 int index) {
346 CPDF_Document* pDestDoc = CPDFDocumentFromFPDFDocument(dest_doc);
347 if (!dest_doc)
348 return false;
349
350 CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
351 if (!pSrcDoc)
352 return false;
353
354 std::vector<uint16_t> pageArray;
355 int nCount = pSrcDoc->GetPageCount();
356 if (pagerange) {
357 if (!ParserPageRangeString(pagerange, &pageArray, nCount))
358 return false;
359 } else {
360 for (int i = 1; i <= nCount; ++i) {
361 pageArray.push_back(i);
362 }
363 }
364
365 CPDF_PageOrganizer pageOrg(pDestDoc, pSrcDoc);
366 return pageOrg.PDFDocInit() && pageOrg.ExportPage(pageArray, index);
367 }
368
FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc,FPDF_DOCUMENT src_doc)369 DLLEXPORT FPDF_BOOL STDCALL FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc,
370 FPDF_DOCUMENT src_doc) {
371 CPDF_Document* pDstDoc = CPDFDocumentFromFPDFDocument(dest_doc);
372 if (!pDstDoc)
373 return false;
374
375 CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
376 if (!pSrcDoc)
377 return false;
378
379 CPDF_Dictionary* pSrcDict = pSrcDoc->GetRoot();
380 pSrcDict = pSrcDict->GetDictFor("ViewerPreferences");
381 if (!pSrcDict)
382 return false;
383
384 CPDF_Dictionary* pDstDict = pDstDoc->GetRoot();
385 if (!pDstDict)
386 return false;
387
388 pDstDict->SetFor("ViewerPreferences", pSrcDict->CloneDirectObject());
389 return true;
390 }
391