1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/page/cpdf_contentparser.h"
8
9 #include "core/fpdfapi/font/cpdf_type3char.h"
10 #include "core/fpdfapi/page/cpdf_allstates.h"
11 #include "core/fpdfapi/page/cpdf_form.h"
12 #include "core/fpdfapi/page/cpdf_page.h"
13 #include "core/fpdfapi/page/cpdf_pageobject.h"
14 #include "core/fpdfapi/page/cpdf_path.h"
15 #include "core/fpdfapi/parser/cpdf_array.h"
16 #include "core/fpdfapi/parser/cpdf_dictionary.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
19 #include "core/fxcrt/fx_safe_types.h"
20 #include "core/fxcrt/ifx_pauseindicator.h"
21 #include "third_party/base/ptr_util.h"
22
23 #define PARSE_STEP_LIMIT 100
24
CPDF_ContentParser(CPDF_Page * pPage)25 CPDF_ContentParser::CPDF_ContentParser(CPDF_Page* pPage)
26 : m_InternalStage(STAGE_GETCONTENT), m_pObjectHolder(pPage) {
27 if (!pPage || !pPage->m_pDocument || !pPage->m_pFormDict) {
28 m_bIsDone = true;
29 return;
30 }
31
32 CPDF_Object* pContent = pPage->m_pFormDict->GetDirectObjectFor("Contents");
33 if (!pContent) {
34 m_bIsDone = true;
35 return;
36 }
37 CPDF_Stream* pStream = pContent->AsStream();
38 if (pStream) {
39 m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
40 m_pSingleStream->LoadAllDataFiltered();
41 return;
42 }
43 CPDF_Array* pArray = pContent->AsArray();
44 if (!pArray) {
45 m_bIsDone = true;
46 return;
47 }
48 m_nStreams = pArray->GetCount();
49 if (!m_nStreams) {
50 m_bIsDone = true;
51 return;
52 }
53 m_StreamArray.resize(m_nStreams);
54 }
55
CPDF_ContentParser(CPDF_Form * pForm,CPDF_AllStates * pGraphicStates,const CFX_Matrix * pParentMatrix,CPDF_Type3Char * pType3Char,std::set<const uint8_t * > * parsedSet)56 CPDF_ContentParser::CPDF_ContentParser(CPDF_Form* pForm,
57 CPDF_AllStates* pGraphicStates,
58 const CFX_Matrix* pParentMatrix,
59 CPDF_Type3Char* pType3Char,
60 std::set<const uint8_t*>* parsedSet)
61 : m_InternalStage(STAGE_PARSE),
62 m_pObjectHolder(pForm),
63 m_pType3Char(pType3Char) {
64 CFX_Matrix form_matrix = pForm->m_pFormDict->GetMatrixFor("Matrix");
65 if (pGraphicStates)
66 form_matrix.Concat(pGraphicStates->m_CTM);
67
68 CPDF_Array* pBBox = pForm->m_pFormDict->GetArrayFor("BBox");
69 CFX_FloatRect form_bbox;
70 CPDF_Path ClipPath;
71 if (pBBox) {
72 form_bbox = pBBox->GetRect();
73 ClipPath.Emplace();
74 ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right,
75 form_bbox.top);
76 ClipPath.Transform(&form_matrix);
77 if (pParentMatrix)
78 ClipPath.Transform(pParentMatrix);
79
80 form_bbox = form_matrix.TransformRect(form_bbox);
81 if (pParentMatrix)
82 form_bbox = pParentMatrix->TransformRect(form_bbox);
83 }
84
85 CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDictFor("Resources");
86 m_pParser = pdfium::MakeUnique<CPDF_StreamContentParser>(
87 pForm->m_pDocument.Get(), pForm->m_pPageResources.Get(),
88 pForm->m_pResources.Get(), pParentMatrix, pForm, pResources, form_bbox,
89 pGraphicStates, parsedSet);
90 m_pParser->GetCurStates()->m_CTM = form_matrix;
91 m_pParser->GetCurStates()->m_ParentMatrix = form_matrix;
92 if (ClipPath.HasRef()) {
93 m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING,
94 true);
95 }
96 if (pForm->m_iTransparency & PDFTRANS_GROUP) {
97 CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState;
98 pState->SetBlendType(FXDIB_BLEND_NORMAL);
99 pState->SetStrokeAlpha(1.0f);
100 pState->SetFillAlpha(1.0f);
101 pState->SetSoftMask(nullptr);
102 }
103 m_pSingleStream =
104 pdfium::MakeRetain<CPDF_StreamAcc>(pForm->m_pFormStream.Get());
105 m_pSingleStream->LoadAllDataFiltered();
106 m_pData.Reset(m_pSingleStream->GetData());
107 m_Size = m_pSingleStream->GetSize();
108 }
109
~CPDF_ContentParser()110 CPDF_ContentParser::~CPDF_ContentParser() {}
111
Continue(IFX_PauseIndicator * pPause)112 bool CPDF_ContentParser::Continue(IFX_PauseIndicator* pPause) {
113 if (m_bIsDone)
114 return false;
115
116 while (!m_bIsDone) {
117 if (m_InternalStage == STAGE_GETCONTENT) {
118 if (m_CurrentOffset == m_nStreams) {
119 if (!m_StreamArray.empty()) {
120 FX_SAFE_UINT32 safeSize = 0;
121 for (const auto& stream : m_StreamArray) {
122 safeSize += stream->GetSize();
123 safeSize += 1;
124 }
125 if (!safeSize.IsValid()) {
126 m_bIsDone = true;
127 return false;
128 }
129 m_Size = safeSize.ValueOrDie();
130 m_pData.Reset(std::unique_ptr<uint8_t, FxFreeDeleter>(
131 FX_Alloc(uint8_t, m_Size)));
132 uint32_t pos = 0;
133 for (const auto& stream : m_StreamArray) {
134 memcpy(m_pData.Get() + pos, stream->GetData(), stream->GetSize());
135 pos += stream->GetSize();
136 m_pData.Get()[pos++] = ' ';
137 }
138 m_StreamArray.clear();
139 } else {
140 m_pData.Reset(m_pSingleStream->GetData());
141 m_Size = m_pSingleStream->GetSize();
142 }
143 m_InternalStage = STAGE_PARSE;
144 m_CurrentOffset = 0;
145 } else {
146 CPDF_Array* pContent =
147 m_pObjectHolder->m_pFormDict->GetArrayFor("Contents");
148 CPDF_Stream* pStreamObj = ToStream(
149 pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr);
150 m_StreamArray[m_CurrentOffset] =
151 pdfium::MakeRetain<CPDF_StreamAcc>(pStreamObj);
152 m_StreamArray[m_CurrentOffset]->LoadAllDataFiltered();
153 m_CurrentOffset++;
154 }
155 }
156 if (m_InternalStage == STAGE_PARSE) {
157 if (!m_pParser) {
158 m_parsedSet = pdfium::MakeUnique<std::set<const uint8_t*>>();
159 m_pParser = pdfium::MakeUnique<CPDF_StreamContentParser>(
160 m_pObjectHolder->m_pDocument.Get(),
161 m_pObjectHolder->m_pPageResources.Get(), nullptr, nullptr,
162 m_pObjectHolder.Get(), m_pObjectHolder->m_pResources.Get(),
163 m_pObjectHolder->m_BBox, nullptr, m_parsedSet.get());
164 m_pParser->GetCurStates()->m_ColorState.SetDefault();
165 }
166 if (m_CurrentOffset >= m_Size) {
167 m_InternalStage = STAGE_CHECKCLIP;
168 } else {
169 m_CurrentOffset +=
170 m_pParser->Parse(m_pData.Get() + m_CurrentOffset,
171 m_Size - m_CurrentOffset, PARSE_STEP_LIMIT);
172 }
173 }
174 if (m_InternalStage == STAGE_CHECKCLIP) {
175 if (m_pType3Char) {
176 m_pType3Char->InitializeFromStreamData(m_pParser->IsColored(),
177 m_pParser->GetType3Data());
178 }
179
180 for (auto& pObj : *m_pObjectHolder->GetPageObjectList()) {
181 if (!pObj->m_ClipPath.HasRef())
182 continue;
183 if (pObj->m_ClipPath.GetPathCount() != 1)
184 continue;
185 if (pObj->m_ClipPath.GetTextCount() > 0)
186 continue;
187
188 CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
189 if (!ClipPath.IsRect() || pObj->IsShading())
190 continue;
191
192 CFX_PointF point0 = ClipPath.GetPoint(0);
193 CFX_PointF point2 = ClipPath.GetPoint(2);
194 CFX_FloatRect old_rect(point0.x, point0.y, point2.x, point2.y);
195 CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right,
196 pObj->m_Top);
197 if (old_rect.Contains(obj_rect))
198 pObj->m_ClipPath.SetNull();
199 }
200 m_bIsDone = true;
201 return false;
202 }
203 if (pPause && pPause->NeedToPauseNow())
204 break;
205 }
206 return true;
207 }
208