1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "public/fpdf_flatten.h"
8
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12 #include <vector>
13
14 #include "core/fpdfapi/page/cpdf_page.h"
15 #include "core/fpdfapi/page/cpdf_pageobject.h"
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_document.h"
18 #include "core/fpdfapi/parser/cpdf_name.h"
19 #include "core/fpdfapi/parser/cpdf_number.h"
20 #include "core/fpdfapi/parser/cpdf_reference.h"
21 #include "core/fpdfapi/parser/cpdf_stream.h"
22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
23 #include "core/fpdfdoc/cpdf_annot.h"
24 #include "fpdfsdk/fsdk_define.h"
25 #include "third_party/base/stl_util.h"
26
27 enum FPDF_TYPE { MAX, MIN };
28 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM };
29
30 namespace {
31
IsValiableRect(CFX_FloatRect rect,CFX_FloatRect rcPage)32 bool IsValiableRect(CFX_FloatRect rect, CFX_FloatRect rcPage) {
33 if (rect.left - rect.right > 0.000001f || rect.bottom - rect.top > 0.000001f)
34 return false;
35
36 if (rect.left == 0.0f && rect.top == 0.0f && rect.right == 0.0f &&
37 rect.bottom == 0.0f)
38 return false;
39
40 if (!rcPage.IsEmpty()) {
41 if (rect.left - rcPage.left < -10.000001f ||
42 rect.right - rcPage.right > 10.000001f ||
43 rect.top - rcPage.top > 10.000001f ||
44 rect.bottom - rcPage.bottom < -10.000001f)
45 return false;
46 }
47
48 return true;
49 }
50
GetContentsRect(CPDF_Document * pDoc,CPDF_Dictionary * pDict,std::vector<CFX_FloatRect> * pRectArray)51 void GetContentsRect(CPDF_Document* pDoc,
52 CPDF_Dictionary* pDict,
53 std::vector<CFX_FloatRect>* pRectArray) {
54 std::unique_ptr<CPDF_Page> pPDFPage(new CPDF_Page(pDoc, pDict, false));
55 pPDFPage->ParseContent();
56
57 for (const auto& pPageObject : *pPDFPage->GetPageObjectList()) {
58 CFX_FloatRect rc;
59 rc.left = pPageObject->m_Left;
60 rc.right = pPageObject->m_Right;
61 rc.bottom = pPageObject->m_Bottom;
62 rc.top = pPageObject->m_Top;
63 if (IsValiableRect(rc, pDict->GetRectFor("MediaBox")))
64 pRectArray->push_back(rc);
65 }
66 }
67
ParserStream(CPDF_Dictionary * pPageDic,CPDF_Dictionary * pStream,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray)68 void ParserStream(CPDF_Dictionary* pPageDic,
69 CPDF_Dictionary* pStream,
70 std::vector<CFX_FloatRect>* pRectArray,
71 std::vector<CPDF_Dictionary*>* pObjectArray) {
72 if (!pStream)
73 return;
74 CFX_FloatRect rect;
75 if (pStream->KeyExist("Rect"))
76 rect = pStream->GetRectFor("Rect");
77 else if (pStream->KeyExist("BBox"))
78 rect = pStream->GetRectFor("BBox");
79
80 if (IsValiableRect(rect, pPageDic->GetRectFor("MediaBox")))
81 pRectArray->push_back(rect);
82
83 pObjectArray->push_back(pStream);
84 }
85
ParserAnnots(CPDF_Document * pSourceDoc,CPDF_Dictionary * pPageDic,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray,int nUsage)86 int ParserAnnots(CPDF_Document* pSourceDoc,
87 CPDF_Dictionary* pPageDic,
88 std::vector<CFX_FloatRect>* pRectArray,
89 std::vector<CPDF_Dictionary*>* pObjectArray,
90 int nUsage) {
91 if (!pSourceDoc || !pPageDic)
92 return FLATTEN_FAIL;
93
94 GetContentsRect(pSourceDoc, pPageDic, pRectArray);
95 CPDF_Array* pAnnots = pPageDic->GetArrayFor("Annots");
96 if (!pAnnots)
97 return FLATTEN_NOTHINGTODO;
98
99 uint32_t dwSize = pAnnots->GetCount();
100 for (int i = 0; i < (int)dwSize; i++) {
101 CPDF_Dictionary* pAnnotDic = ToDictionary(pAnnots->GetDirectObjectAt(i));
102 if (!pAnnotDic)
103 continue;
104
105 CFX_ByteString sSubtype = pAnnotDic->GetStringFor("Subtype");
106 if (sSubtype == "Popup")
107 continue;
108
109 int nAnnotFlag = pAnnotDic->GetIntegerFor("F");
110 if (nAnnotFlag & ANNOTFLAG_HIDDEN)
111 continue;
112
113 if (nUsage == FLAT_NORMALDISPLAY) {
114 if (nAnnotFlag & ANNOTFLAG_INVISIBLE)
115 continue;
116
117 ParserStream(pPageDic, pAnnotDic, pRectArray, pObjectArray);
118 } else {
119 if (nAnnotFlag & ANNOTFLAG_PRINT)
120 ParserStream(pPageDic, pAnnotDic, pRectArray, pObjectArray);
121 }
122 }
123 return FLATTEN_SUCCESS;
124 }
125
GetMinMaxValue(const std::vector<CFX_FloatRect> & array,FPDF_TYPE type,FPDF_VALUE value)126 FX_FLOAT GetMinMaxValue(const std::vector<CFX_FloatRect>& array,
127 FPDF_TYPE type,
128 FPDF_VALUE value) {
129 size_t nRects = array.size();
130 if (nRects <= 0)
131 return 0.0f;
132
133 std::vector<FX_FLOAT> pArray(nRects);
134 switch (value) {
135 case LEFT:
136 for (size_t i = 0; i < nRects; i++)
137 pArray[i] = array[i].left;
138 break;
139 case TOP:
140 for (size_t i = 0; i < nRects; i++)
141 pArray[i] = array[i].top;
142 break;
143 case RIGHT:
144 for (size_t i = 0; i < nRects; i++)
145 pArray[i] = array[i].right;
146 break;
147 case BOTTOM:
148 for (size_t i = 0; i < nRects; i++)
149 pArray[i] = array[i].bottom;
150 break;
151 default:
152 // Not reachable.
153 return 0.0f;
154 }
155
156 FX_FLOAT fRet = pArray[0];
157 if (type == MAX) {
158 for (size_t i = 1; i < nRects; i++)
159 fRet = std::max(fRet, pArray[i]);
160 } else {
161 for (size_t i = 1; i < nRects; i++)
162 fRet = std::min(fRet, pArray[i]);
163 }
164 return fRet;
165 }
166
CalculateRect(std::vector<CFX_FloatRect> * pRectArray)167 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) {
168 CFX_FloatRect rcRet;
169
170 rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT);
171 rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP);
172 rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT);
173 rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM);
174
175 return rcRet;
176 }
177
NewIndirectContentsStream(const CFX_ByteString & key,CPDF_Document * pDocument)178 uint32_t NewIndirectContentsStream(const CFX_ByteString& key,
179 CPDF_Document* pDocument) {
180 CPDF_Stream* pNewContents = pDocument->NewIndirect<CPDF_Stream>(
181 nullptr, 0,
182 pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool()));
183 CFX_ByteString sStream;
184 sStream.Format("q 1 0 0 1 0 0 cm /%s Do Q", key.c_str());
185 pNewContents->SetData(sStream.raw_str(), sStream.GetLength());
186 return pNewContents->GetObjNum();
187 }
188
SetPageContents(const CFX_ByteString & key,CPDF_Dictionary * pPage,CPDF_Document * pDocument)189 void SetPageContents(const CFX_ByteString& key,
190 CPDF_Dictionary* pPage,
191 CPDF_Document* pDocument) {
192 CPDF_Array* pContentsArray = nullptr;
193 CPDF_Stream* pContentsStream = pPage->GetStreamFor("Contents");
194 if (!pContentsStream) {
195 pContentsArray = pPage->GetArrayFor("Contents");
196 if (!pContentsArray) {
197 if (!key.IsEmpty()) {
198 pPage->SetNewFor<CPDF_Reference>(
199 "Contents", pDocument, NewIndirectContentsStream(key, pDocument));
200 }
201 return;
202 }
203 }
204 pPage->ConvertToIndirectObjectFor("Contents", pDocument);
205 if (!pContentsArray) {
206 pContentsArray = pDocument->NewIndirect<CPDF_Array>();
207 CPDF_StreamAcc acc;
208 acc.LoadAllData(pContentsStream);
209 CFX_ByteString sStream = "q\n";
210 CFX_ByteString sBody =
211 CFX_ByteString((const FX_CHAR*)acc.GetData(), acc.GetSize());
212 sStream = sStream + sBody + "\nQ";
213 pContentsStream->SetData(sStream.raw_str(), sStream.GetLength());
214 pContentsArray->AddNew<CPDF_Reference>(pDocument,
215 pContentsStream->GetObjNum());
216 pPage->SetNewFor<CPDF_Reference>("Contents", pDocument,
217 pContentsArray->GetObjNum());
218 }
219 if (!key.IsEmpty()) {
220 pContentsArray->AddNew<CPDF_Reference>(
221 pDocument, NewIndirectContentsStream(key, pDocument));
222 }
223 }
224
GetMatrix(CFX_FloatRect rcAnnot,CFX_FloatRect rcStream,const CFX_Matrix & matrix)225 CFX_Matrix GetMatrix(CFX_FloatRect rcAnnot,
226 CFX_FloatRect rcStream,
227 const CFX_Matrix& matrix) {
228 if (rcStream.IsEmpty())
229 return CFX_Matrix();
230
231 matrix.TransformRect(rcStream);
232 rcStream.Normalize();
233
234 FX_FLOAT a = rcAnnot.Width() / rcStream.Width();
235 FX_FLOAT d = rcAnnot.Height() / rcStream.Height();
236
237 FX_FLOAT e = rcAnnot.left - rcStream.left * a;
238 FX_FLOAT f = rcAnnot.bottom - rcStream.bottom * d;
239 return CFX_Matrix(a, 0, 0, d, e, f);
240 }
241
242 } // namespace
243
FPDFPage_Flatten(FPDF_PAGE page,int nFlag)244 DLLEXPORT int STDCALL FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
245 CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
246 if (!page)
247 return FLATTEN_FAIL;
248
249 CPDF_Document* pDocument = pPage->m_pDocument;
250 CPDF_Dictionary* pPageDict = pPage->m_pFormDict;
251 if (!pDocument || !pPageDict)
252 return FLATTEN_FAIL;
253
254 std::vector<CPDF_Dictionary*> ObjectArray;
255 std::vector<CFX_FloatRect> RectArray;
256 int iRet =
257 ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag);
258 if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL)
259 return iRet;
260
261 CFX_FloatRect rcOriginalCB;
262 CFX_FloatRect rcMerger = CalculateRect(&RectArray);
263 CFX_FloatRect rcOriginalMB = pPageDict->GetRectFor("MediaBox");
264 if (pPageDict->KeyExist("CropBox"))
265 rcOriginalMB = pPageDict->GetRectFor("CropBox");
266
267 if (rcOriginalMB.IsEmpty())
268 rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f);
269
270 rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left);
271 rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right);
272 rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom);
273 rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top);
274 if (pPageDict->KeyExist("ArtBox"))
275 rcOriginalCB = pPageDict->GetRectFor("ArtBox");
276 else
277 rcOriginalCB = rcOriginalMB;
278
279 if (!rcOriginalMB.IsEmpty()) {
280 CPDF_Array* pMediaBox = pPageDict->SetNewFor<CPDF_Array>("MediaBox");
281 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.left);
282 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.bottom);
283 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.right);
284 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.top);
285 }
286
287 if (!rcOriginalCB.IsEmpty()) {
288 CPDF_Array* pCropBox = pPageDict->SetNewFor<CPDF_Array>("ArtBox");
289 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.left);
290 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.bottom);
291 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.right);
292 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.top);
293 }
294
295 CPDF_Dictionary* pRes = pPageDict->GetDictFor("Resources");
296 if (!pRes)
297 pRes = pPageDict->SetNewFor<CPDF_Dictionary>("Resources");
298
299 CPDF_Stream* pNewXObject = pDocument->NewIndirect<CPDF_Stream>(
300 nullptr, 0,
301 pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool()));
302
303 uint32_t dwObjNum = pNewXObject->GetObjNum();
304 CPDF_Dictionary* pPageXObject = pRes->GetDictFor("XObject");
305 if (!pPageXObject)
306 pPageXObject = pRes->SetNewFor<CPDF_Dictionary>("XObject");
307
308 CFX_ByteString key = "";
309 int nStreams = pdfium::CollectionSize<int>(ObjectArray);
310 if (nStreams > 0) {
311 for (int iKey = 0; /*iKey < 100*/; iKey++) {
312 char sExtend[5] = {};
313 FXSYS_itoa(iKey, sExtend, 10);
314 key = CFX_ByteString("FFT") + CFX_ByteString(sExtend);
315 if (!pPageXObject->KeyExist(key))
316 break;
317 }
318 }
319
320 SetPageContents(key, pPageDict, pDocument);
321
322 CPDF_Dictionary* pNewXORes = nullptr;
323 if (!key.IsEmpty()) {
324 pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument, dwObjNum);
325 CPDF_Dictionary* pNewOXbjectDic = pNewXObject->GetDict();
326 pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources");
327 pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject");
328 pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form");
329 pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1);
330 pNewOXbjectDic->SetNewFor<CPDF_Name>("Name", "FRM");
331 CFX_FloatRect rcBBox = pPageDict->GetRectFor("ArtBox");
332 pNewOXbjectDic->SetRectFor("BBox", rcBBox);
333 }
334
335 for (int i = 0; i < nStreams; i++) {
336 CPDF_Dictionary* pAnnotDic = ObjectArray[i];
337 if (!pAnnotDic)
338 continue;
339
340 CFX_FloatRect rcAnnot = pAnnotDic->GetRectFor("Rect");
341 rcAnnot.Normalize();
342
343 CFX_ByteString sAnnotState = pAnnotDic->GetStringFor("AS");
344 CPDF_Dictionary* pAnnotAP = pAnnotDic->GetDictFor("AP");
345 if (!pAnnotAP)
346 continue;
347
348 CPDF_Stream* pAPStream = pAnnotAP->GetStreamFor("N");
349 if (!pAPStream) {
350 CPDF_Dictionary* pAPDic = pAnnotAP->GetDictFor("N");
351 if (!pAPDic)
352 continue;
353
354 if (!sAnnotState.IsEmpty()) {
355 pAPStream = pAPDic->GetStreamFor(sAnnotState);
356 } else {
357 auto it = pAPDic->begin();
358 if (it != pAPDic->end()) {
359 CPDF_Object* pFirstObj = it->second.get();
360 if (pFirstObj) {
361 if (pFirstObj->IsReference())
362 pFirstObj = pFirstObj->GetDirect();
363 if (!pFirstObj->IsStream())
364 continue;
365 pAPStream = pFirstObj->AsStream();
366 }
367 }
368 }
369 }
370 if (!pAPStream)
371 continue;
372
373 CPDF_Dictionary* pAPDic = pAPStream->GetDict();
374 CFX_FloatRect rcStream;
375 if (pAPDic->KeyExist("Rect"))
376 rcStream = pAPDic->GetRectFor("Rect");
377 else if (pAPDic->KeyExist("BBox"))
378 rcStream = pAPDic->GetRectFor("BBox");
379
380 if (rcStream.IsEmpty())
381 continue;
382
383 CPDF_Object* pObj = pAPStream;
384 if (pObj->IsInline()) {
385 std::unique_ptr<CPDF_Object> pNew = pObj->Clone();
386 pObj = pNew.get();
387 pDocument->AddIndirectObject(std::move(pNew));
388 }
389
390 CPDF_Dictionary* pObjDic = pObj->GetDict();
391 if (pObjDic) {
392 pObjDic->SetNewFor<CPDF_Name>("Type", "XObject");
393 pObjDic->SetNewFor<CPDF_Name>("Subtype", "Form");
394 }
395
396 CPDF_Dictionary* pXObject = pNewXORes->GetDictFor("XObject");
397 if (!pXObject)
398 pXObject = pNewXORes->SetNewFor<CPDF_Dictionary>("XObject");
399
400 CFX_ByteString sFormName;
401 sFormName.Format("F%d", i);
402 pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument,
403 pObj->GetObjNum());
404
405 CPDF_StreamAcc acc;
406 acc.LoadAllData(pNewXObject);
407
408 const uint8_t* pData = acc.GetData();
409 CFX_ByteString sStream(pData, acc.GetSize());
410 CFX_Matrix matrix = pAPDic->GetMatrixFor("Matrix");
411 if (matrix.IsIdentity()) {
412 matrix.a = 1.0f;
413 matrix.b = 0.0f;
414 matrix.c = 0.0f;
415 matrix.d = 1.0f;
416 matrix.e = 0.0f;
417 matrix.f = 0.0f;
418 }
419
420 CFX_ByteString sTemp;
421 CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix);
422 sTemp.Format("q %f 0 0 %f %f %f cm /%s Do Q\n", m.a, m.d, m.e, m.f,
423 sFormName.c_str());
424 sStream += sTemp;
425 pNewXObject->SetData(sStream.raw_str(), sStream.GetLength());
426 }
427 pPageDict->RemoveFor("Annots");
428 return FLATTEN_SUCCESS;
429 }
430