1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "public/fpdf_flatten.h"
8
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12 #include <vector>
13
14 #include "core/fpdfapi/page/cpdf_page.h"
15 #include "core/fpdfapi/page/cpdf_pageobject.h"
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_document.h"
18 #include "core/fpdfapi/parser/cpdf_name.h"
19 #include "core/fpdfapi/parser/cpdf_number.h"
20 #include "core/fpdfapi/parser/cpdf_reference.h"
21 #include "core/fpdfapi/parser/cpdf_stream.h"
22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
23 #include "core/fpdfdoc/cpdf_annot.h"
24 #include "fpdfsdk/fsdk_define.h"
25 #include "third_party/base/stl_util.h"
26
27 enum FPDF_TYPE { MAX, MIN };
28 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM };
29
30 namespace {
31
IsValidRect(const CFX_FloatRect & rect,const CFX_FloatRect & rcPage)32 bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) {
33 constexpr float kMinSize = 0.000001f;
34 if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize)
35 return false;
36
37 if (rcPage.IsEmpty())
38 return true;
39
40 constexpr float kMinBorderSize = 10.000001f;
41 return rect.left - rcPage.left >= -kMinBorderSize &&
42 rect.right - rcPage.right <= kMinBorderSize &&
43 rect.top - rcPage.top <= kMinBorderSize &&
44 rect.bottom - rcPage.bottom >= -kMinBorderSize;
45 }
46
GetContentsRect(CPDF_Document * pDoc,CPDF_Dictionary * pDict,std::vector<CFX_FloatRect> * pRectArray)47 void GetContentsRect(CPDF_Document* pDoc,
48 CPDF_Dictionary* pDict,
49 std::vector<CFX_FloatRect>* pRectArray) {
50 auto pPDFPage = pdfium::MakeUnique<CPDF_Page>(pDoc, pDict, false);
51 pPDFPage->ParseContent();
52
53 for (const auto& pPageObject : *pPDFPage->GetPageObjectList()) {
54 CFX_FloatRect rc;
55 rc.left = pPageObject->m_Left;
56 rc.right = pPageObject->m_Right;
57 rc.bottom = pPageObject->m_Bottom;
58 rc.top = pPageObject->m_Top;
59 if (IsValidRect(rc, pDict->GetRectFor("MediaBox")))
60 pRectArray->push_back(rc);
61 }
62 }
63
ParserStream(CPDF_Dictionary * pPageDic,CPDF_Dictionary * pStream,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray)64 void ParserStream(CPDF_Dictionary* pPageDic,
65 CPDF_Dictionary* pStream,
66 std::vector<CFX_FloatRect>* pRectArray,
67 std::vector<CPDF_Dictionary*>* pObjectArray) {
68 if (!pStream)
69 return;
70 CFX_FloatRect rect;
71 if (pStream->KeyExist("Rect"))
72 rect = pStream->GetRectFor("Rect");
73 else if (pStream->KeyExist("BBox"))
74 rect = pStream->GetRectFor("BBox");
75
76 if (IsValidRect(rect, pPageDic->GetRectFor("MediaBox")))
77 pRectArray->push_back(rect);
78
79 pObjectArray->push_back(pStream);
80 }
81
ParserAnnots(CPDF_Document * pSourceDoc,CPDF_Dictionary * pPageDic,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray,int nUsage)82 int ParserAnnots(CPDF_Document* pSourceDoc,
83 CPDF_Dictionary* pPageDic,
84 std::vector<CFX_FloatRect>* pRectArray,
85 std::vector<CPDF_Dictionary*>* pObjectArray,
86 int nUsage) {
87 if (!pSourceDoc || !pPageDic)
88 return FLATTEN_FAIL;
89
90 GetContentsRect(pSourceDoc, pPageDic, pRectArray);
91 CPDF_Array* pAnnots = pPageDic->GetArrayFor("Annots");
92 if (!pAnnots)
93 return FLATTEN_NOTHINGTODO;
94
95 for (const auto& pAnnot : *pAnnots) {
96 CPDF_Dictionary* pAnnotDic = ToDictionary(pAnnot->GetDirect());
97 if (!pAnnotDic)
98 continue;
99
100 ByteString sSubtype = pAnnotDic->GetStringFor("Subtype");
101 if (sSubtype == "Popup")
102 continue;
103
104 int nAnnotFlag = pAnnotDic->GetIntegerFor("F");
105 if (nAnnotFlag & ANNOTFLAG_HIDDEN)
106 continue;
107
108 bool bParseStream;
109 if (nUsage == FLAT_NORMALDISPLAY)
110 bParseStream = !(nAnnotFlag & ANNOTFLAG_INVISIBLE);
111 else
112 bParseStream = !!(nAnnotFlag & ANNOTFLAG_PRINT);
113 if (bParseStream)
114 ParserStream(pPageDic, pAnnotDic, pRectArray, pObjectArray);
115 }
116 return FLATTEN_SUCCESS;
117 }
118
GetMinMaxValue(const std::vector<CFX_FloatRect> & array,FPDF_TYPE type,FPDF_VALUE value)119 float GetMinMaxValue(const std::vector<CFX_FloatRect>& array,
120 FPDF_TYPE type,
121 FPDF_VALUE value) {
122 if (array.empty())
123 return 0.0f;
124
125 size_t nRects = array.size();
126 std::vector<float> pArray(nRects);
127 switch (value) {
128 case LEFT:
129 for (size_t i = 0; i < nRects; i++)
130 pArray[i] = array[i].left;
131 break;
132 case TOP:
133 for (size_t i = 0; i < nRects; i++)
134 pArray[i] = array[i].top;
135 break;
136 case RIGHT:
137 for (size_t i = 0; i < nRects; i++)
138 pArray[i] = array[i].right;
139 break;
140 case BOTTOM:
141 for (size_t i = 0; i < nRects; i++)
142 pArray[i] = array[i].bottom;
143 break;
144 default:
145 NOTREACHED();
146 return 0.0f;
147 }
148
149 float fRet = pArray[0];
150 if (type == MAX) {
151 for (size_t i = 1; i < nRects; i++)
152 fRet = std::max(fRet, pArray[i]);
153 } else {
154 for (size_t i = 1; i < nRects; i++)
155 fRet = std::min(fRet, pArray[i]);
156 }
157 return fRet;
158 }
159
CalculateRect(std::vector<CFX_FloatRect> * pRectArray)160 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) {
161 CFX_FloatRect rcRet;
162
163 rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT);
164 rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP);
165 rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT);
166 rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM);
167
168 return rcRet;
169 }
170
NewIndirectContentsStream(const ByteString & key,CPDF_Document * pDocument)171 uint32_t NewIndirectContentsStream(const ByteString& key,
172 CPDF_Document* pDocument) {
173 CPDF_Stream* pNewContents = pDocument->NewIndirect<CPDF_Stream>(
174 nullptr, 0,
175 pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool()));
176 ByteString sStream =
177 ByteString::Format("q 1 0 0 1 0 0 cm /%s Do Q", key.c_str());
178 pNewContents->SetData(sStream.raw_str(), sStream.GetLength());
179 return pNewContents->GetObjNum();
180 }
181
SetPageContents(const ByteString & key,CPDF_Dictionary * pPage,CPDF_Document * pDocument)182 void SetPageContents(const ByteString& key,
183 CPDF_Dictionary* pPage,
184 CPDF_Document* pDocument) {
185 CPDF_Array* pContentsArray = nullptr;
186 CPDF_Stream* pContentsStream = pPage->GetStreamFor("Contents");
187 if (!pContentsStream) {
188 pContentsArray = pPage->GetArrayFor("Contents");
189 if (!pContentsArray) {
190 if (!key.IsEmpty()) {
191 pPage->SetNewFor<CPDF_Reference>(
192 "Contents", pDocument, NewIndirectContentsStream(key, pDocument));
193 }
194 return;
195 }
196 }
197 pPage->ConvertToIndirectObjectFor("Contents", pDocument);
198 if (!pContentsArray) {
199 pContentsArray = pDocument->NewIndirect<CPDF_Array>();
200 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pContentsStream);
201 pAcc->LoadAllDataFiltered();
202 ByteString sStream = "q\n";
203 ByteString sBody = ByteString(pAcc->GetData(), pAcc->GetSize());
204 sStream = sStream + sBody + "\nQ";
205 pContentsStream->SetDataAndRemoveFilter(sStream.raw_str(),
206 sStream.GetLength());
207 pContentsArray->AddNew<CPDF_Reference>(pDocument,
208 pContentsStream->GetObjNum());
209 pPage->SetNewFor<CPDF_Reference>("Contents", pDocument,
210 pContentsArray->GetObjNum());
211 }
212 if (!key.IsEmpty()) {
213 pContentsArray->AddNew<CPDF_Reference>(
214 pDocument, NewIndirectContentsStream(key, pDocument));
215 }
216 }
217
GetMatrix(CFX_FloatRect rcAnnot,CFX_FloatRect rcStream,const CFX_Matrix & matrix)218 CFX_Matrix GetMatrix(CFX_FloatRect rcAnnot,
219 CFX_FloatRect rcStream,
220 const CFX_Matrix& matrix) {
221 if (rcStream.IsEmpty())
222 return CFX_Matrix();
223
224 rcStream = matrix.TransformRect(rcStream);
225 rcStream.Normalize();
226
227 float a = rcAnnot.Width() / rcStream.Width();
228 float d = rcAnnot.Height() / rcStream.Height();
229
230 float e = rcAnnot.left - rcStream.left * a;
231 float f = rcAnnot.bottom - rcStream.bottom * d;
232 return CFX_Matrix(a, 0, 0, d, e, f);
233 }
234
235 } // namespace
236
FPDFPage_Flatten(FPDF_PAGE page,int nFlag)237 FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
238 CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
239 if (!page)
240 return FLATTEN_FAIL;
241
242 CPDF_Document* pDocument = pPage->m_pDocument.Get();
243 CPDF_Dictionary* pPageDict = pPage->m_pFormDict.Get();
244 if (!pDocument || !pPageDict)
245 return FLATTEN_FAIL;
246
247 std::vector<CPDF_Dictionary*> ObjectArray;
248 std::vector<CFX_FloatRect> RectArray;
249 int iRet =
250 ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag);
251 if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL)
252 return iRet;
253
254 CFX_FloatRect rcOriginalCB;
255 CFX_FloatRect rcMerger = CalculateRect(&RectArray);
256 CFX_FloatRect rcOriginalMB = pPageDict->GetRectFor("MediaBox");
257 if (pPageDict->KeyExist("CropBox"))
258 rcOriginalMB = pPageDict->GetRectFor("CropBox");
259
260 if (rcOriginalMB.IsEmpty())
261 rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f);
262
263 rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left);
264 rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right);
265 rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom);
266 rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top);
267 if (pPageDict->KeyExist("ArtBox"))
268 rcOriginalCB = pPageDict->GetRectFor("ArtBox");
269 else
270 rcOriginalCB = rcOriginalMB;
271
272 if (!rcOriginalMB.IsEmpty()) {
273 CPDF_Array* pMediaBox = pPageDict->SetNewFor<CPDF_Array>("MediaBox");
274 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.left);
275 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.bottom);
276 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.right);
277 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.top);
278 }
279
280 if (!rcOriginalCB.IsEmpty()) {
281 CPDF_Array* pCropBox = pPageDict->SetNewFor<CPDF_Array>("ArtBox");
282 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.left);
283 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.bottom);
284 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.right);
285 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.top);
286 }
287
288 CPDF_Dictionary* pRes = pPageDict->GetDictFor("Resources");
289 if (!pRes)
290 pRes = pPageDict->SetNewFor<CPDF_Dictionary>("Resources");
291
292 CPDF_Stream* pNewXObject = pDocument->NewIndirect<CPDF_Stream>(
293 nullptr, 0,
294 pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool()));
295
296 uint32_t dwObjNum = pNewXObject->GetObjNum();
297 CPDF_Dictionary* pPageXObject = pRes->GetDictFor("XObject");
298 if (!pPageXObject)
299 pPageXObject = pRes->SetNewFor<CPDF_Dictionary>("XObject");
300
301 ByteString key;
302 if (!ObjectArray.empty()) {
303 int i = 0;
304 while (i < INT_MAX) {
305 ByteString sKey = ByteString::Format("FFT%d", i);
306 if (!pPageXObject->KeyExist(sKey)) {
307 key = sKey;
308 break;
309 }
310 ++i;
311 }
312 }
313
314 SetPageContents(key, pPageDict, pDocument);
315
316 CPDF_Dictionary* pNewXORes = nullptr;
317 if (!key.IsEmpty()) {
318 pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument, dwObjNum);
319 CPDF_Dictionary* pNewOXbjectDic = pNewXObject->GetDict();
320 pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources");
321 pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject");
322 pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form");
323 pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1);
324 CFX_FloatRect rcBBox = pPageDict->GetRectFor("ArtBox");
325 pNewOXbjectDic->SetRectFor("BBox", rcBBox);
326 }
327
328 for (size_t i = 0; i < ObjectArray.size(); ++i) {
329 CPDF_Dictionary* pAnnotDic = ObjectArray[i];
330 if (!pAnnotDic)
331 continue;
332
333 CFX_FloatRect rcAnnot = pAnnotDic->GetRectFor("Rect");
334 rcAnnot.Normalize();
335
336 ByteString sAnnotState = pAnnotDic->GetStringFor("AS");
337 CPDF_Dictionary* pAnnotAP = pAnnotDic->GetDictFor("AP");
338 if (!pAnnotAP)
339 continue;
340
341 CPDF_Stream* pAPStream = pAnnotAP->GetStreamFor("N");
342 if (!pAPStream) {
343 CPDF_Dictionary* pAPDic = pAnnotAP->GetDictFor("N");
344 if (!pAPDic)
345 continue;
346
347 if (!sAnnotState.IsEmpty()) {
348 pAPStream = pAPDic->GetStreamFor(sAnnotState);
349 } else {
350 if (pAPDic->GetCount() > 0) {
351 CPDF_Object* pFirstObj = pAPDic->begin()->second.get();
352 if (pFirstObj) {
353 if (pFirstObj->IsReference())
354 pFirstObj = pFirstObj->GetDirect();
355 if (!pFirstObj->IsStream())
356 continue;
357 pAPStream = pFirstObj->AsStream();
358 }
359 }
360 }
361 }
362 if (!pAPStream)
363 continue;
364
365 CPDF_Dictionary* pAPDic = pAPStream->GetDict();
366 CFX_FloatRect rcStream;
367 if (pAPDic->KeyExist("Rect"))
368 rcStream = pAPDic->GetRectFor("Rect");
369 else if (pAPDic->KeyExist("BBox"))
370 rcStream = pAPDic->GetRectFor("BBox");
371
372 if (rcStream.IsEmpty())
373 continue;
374
375 CPDF_Object* pObj = pAPStream;
376 if (pObj->IsInline()) {
377 std::unique_ptr<CPDF_Object> pNew = pObj->Clone();
378 pObj = pNew.get();
379 pDocument->AddIndirectObject(std::move(pNew));
380 }
381
382 CPDF_Dictionary* pObjDic = pObj->GetDict();
383 if (pObjDic) {
384 pObjDic->SetNewFor<CPDF_Name>("Type", "XObject");
385 pObjDic->SetNewFor<CPDF_Name>("Subtype", "Form");
386 }
387
388 CPDF_Dictionary* pXObject = pNewXORes->GetDictFor("XObject");
389 if (!pXObject)
390 pXObject = pNewXORes->SetNewFor<CPDF_Dictionary>("XObject");
391
392 ByteString sFormName = ByteString::Format("F%d", i);
393 pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument,
394 pObj->GetObjNum());
395
396 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pNewXObject);
397 pAcc->LoadAllDataFiltered();
398 ByteString sStream(pAcc->GetData(), pAcc->GetSize());
399 CFX_Matrix matrix = pAPDic->GetMatrixFor("Matrix");
400 CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix);
401 sStream += ByteString::Format("q %f 0 0 %f %f %f cm /%s Do Q\n", m.a, m.d,
402 m.e, m.f, sFormName.c_str());
403 pNewXObject->SetDataAndRemoveFilter(sStream.raw_str(), sStream.GetLength());
404 }
405 pPageDict->RemoveFor("Annots");
406 return FLATTEN_SUCCESS;
407 }
408