1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/page/cpdf_contentparser.h"
8
9 #include "constants/page_object.h"
10 #include "core/fpdfapi/font/cpdf_type3char.h"
11 #include "core/fpdfapi/page/cpdf_allstates.h"
12 #include "core/fpdfapi/page/cpdf_form.h"
13 #include "core/fpdfapi/page/cpdf_page.h"
14 #include "core/fpdfapi/page/cpdf_pageobject.h"
15 #include "core/fpdfapi/page/cpdf_path.h"
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
18 #include "core/fpdfapi/parser/cpdf_stream.h"
19 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
20 #include "core/fxcrt/fx_safe_types.h"
21 #include "core/fxcrt/pauseindicator_iface.h"
22 #include "core/fxge/render_defines.h"
23 #include "third_party/base/ptr_util.h"
24
CPDF_ContentParser(CPDF_Page * pPage)25 CPDF_ContentParser::CPDF_ContentParser(CPDF_Page* pPage)
26 : m_CurrentStage(Stage::kGetContent), m_pObjectHolder(pPage) {
27 ASSERT(pPage);
28 if (!pPage->GetDocument()) {
29 m_CurrentStage = Stage::kComplete;
30 return;
31 }
32
33 CPDF_Object* pContent =
34 pPage->GetDict()->GetDirectObjectFor(pdfium::page_object::kContents);
35 if (!pContent) {
36 HandlePageContentFailure();
37 return;
38 }
39
40 CPDF_Stream* pStream = pContent->AsStream();
41 if (pStream) {
42 HandlePageContentStream(pStream);
43 return;
44 }
45
46 CPDF_Array* pArray = pContent->AsArray();
47 if (pArray && HandlePageContentArray(pArray))
48 return;
49
50 HandlePageContentFailure();
51 }
52
CPDF_ContentParser(CPDF_Form * pForm,const CPDF_AllStates * pGraphicStates,const CFX_Matrix * pParentMatrix,CPDF_Type3Char * pType3Char,std::set<const uint8_t * > * pParsedSet)53 CPDF_ContentParser::CPDF_ContentParser(CPDF_Form* pForm,
54 const CPDF_AllStates* pGraphicStates,
55 const CFX_Matrix* pParentMatrix,
56 CPDF_Type3Char* pType3Char,
57 std::set<const uint8_t*>* pParsedSet)
58 : m_CurrentStage(Stage::kParse),
59 m_pObjectHolder(pForm),
60 m_pType3Char(pType3Char) {
61 ASSERT(pForm);
62 CFX_Matrix form_matrix = pForm->GetDict()->GetMatrixFor("Matrix");
63 if (pGraphicStates)
64 form_matrix.Concat(pGraphicStates->m_CTM);
65
66 CPDF_Array* pBBox = pForm->GetDict()->GetArrayFor("BBox");
67 CFX_FloatRect form_bbox;
68 CPDF_Path ClipPath;
69 if (pBBox) {
70 form_bbox = pBBox->GetRect();
71 ClipPath.Emplace();
72 ClipPath.AppendFloatRect(form_bbox);
73 ClipPath.Transform(form_matrix);
74 if (pParentMatrix)
75 ClipPath.Transform(*pParentMatrix);
76
77 form_bbox = form_matrix.TransformRect(form_bbox);
78 if (pParentMatrix)
79 form_bbox = pParentMatrix->TransformRect(form_bbox);
80 }
81
82 CPDF_Dictionary* pResources = pForm->GetDict()->GetDictFor("Resources");
83 m_pParser = pdfium::MakeUnique<CPDF_StreamContentParser>(
84 pForm->GetDocument(), pForm->m_pPageResources.Get(),
85 pForm->m_pResources.Get(), pParentMatrix, pForm, pResources, form_bbox,
86 pGraphicStates, pParsedSet);
87 m_pParser->GetCurStates()->m_CTM = form_matrix;
88 m_pParser->GetCurStates()->m_ParentMatrix = form_matrix;
89 if (ClipPath.HasRef()) {
90 m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING,
91 true);
92 }
93 if (pForm->GetTransparency().IsGroup()) {
94 CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState;
95 pState->SetBlendType(BlendMode::kNormal);
96 pState->SetStrokeAlpha(1.0f);
97 pState->SetFillAlpha(1.0f);
98 pState->SetSoftMask(nullptr);
99 }
100 m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(pForm->GetStream());
101 m_pSingleStream->LoadAllDataFiltered();
102 m_pData.Reset(m_pSingleStream->GetData());
103 m_Size = m_pSingleStream->GetSize();
104 }
105
~CPDF_ContentParser()106 CPDF_ContentParser::~CPDF_ContentParser() {}
107
108 // Returning |true| means that there is more content to be processed and
109 // Continue() should be called again. Returning |false| means that we've
110 // completed the parse and Continue() is complete.
Continue(PauseIndicatorIface * pPause)111 bool CPDF_ContentParser::Continue(PauseIndicatorIface* pPause) {
112 while (m_CurrentStage == Stage::kGetContent) {
113 m_CurrentStage = GetContent();
114 if (pPause && pPause->NeedToPauseNow())
115 return true;
116 }
117
118 if (m_CurrentStage == Stage::kPrepareContent)
119 m_CurrentStage = PrepareContent();
120
121 while (m_CurrentStage == Stage::kParse) {
122 m_CurrentStage = Parse();
123 if (pPause && pPause->NeedToPauseNow())
124 return true;
125 }
126
127 if (m_CurrentStage == Stage::kCheckClip)
128 m_CurrentStage = CheckClip();
129
130 ASSERT(m_CurrentStage == Stage::kComplete);
131 return false;
132 }
133
GetContent()134 CPDF_ContentParser::Stage CPDF_ContentParser::GetContent() {
135 ASSERT(m_CurrentStage == Stage::kGetContent);
136 ASSERT(m_pObjectHolder->IsPage());
137 CPDF_Array* pContent =
138 m_pObjectHolder->GetDict()->GetArrayFor(pdfium::page_object::kContents);
139 CPDF_Stream* pStreamObj = ToStream(
140 pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr);
141 m_StreamArray[m_CurrentOffset] =
142 pdfium::MakeRetain<CPDF_StreamAcc>(pStreamObj);
143 m_StreamArray[m_CurrentOffset]->LoadAllDataFiltered();
144 m_CurrentOffset++;
145
146 return m_CurrentOffset == m_nStreams ? Stage::kPrepareContent
147 : Stage::kGetContent;
148 }
149
PrepareContent()150 CPDF_ContentParser::Stage CPDF_ContentParser::PrepareContent() {
151 m_CurrentOffset = 0;
152
153 if (m_StreamArray.empty()) {
154 m_pData.Reset(m_pSingleStream->GetData());
155 m_Size = m_pSingleStream->GetSize();
156 return Stage::kParse;
157 }
158
159 FX_SAFE_UINT32 safeSize = 0;
160 for (const auto& stream : m_StreamArray) {
161 m_StreamSegmentOffsets.push_back(safeSize.ValueOrDie());
162
163 safeSize += stream->GetSize();
164 safeSize += 1;
165 if (!safeSize.IsValid())
166 return Stage::kComplete;
167 }
168
169 m_Size = safeSize.ValueOrDie();
170 m_pData.Reset(
171 std::unique_ptr<uint8_t, FxFreeDeleter>(FX_Alloc(uint8_t, m_Size)));
172
173 uint32_t pos = 0;
174 for (const auto& stream : m_StreamArray) {
175 memcpy(m_pData.Get() + pos, stream->GetData(), stream->GetSize());
176 pos += stream->GetSize();
177 m_pData.Get()[pos++] = ' ';
178 }
179 m_StreamArray.clear();
180
181 return Stage::kParse;
182 }
183
Parse()184 CPDF_ContentParser::Stage CPDF_ContentParser::Parse() {
185 if (!m_pParser) {
186 m_pParsedSet = pdfium::MakeUnique<std::set<const uint8_t*>>();
187 m_pParser = pdfium::MakeUnique<CPDF_StreamContentParser>(
188 m_pObjectHolder->GetDocument(), m_pObjectHolder->m_pPageResources.Get(),
189 nullptr, nullptr, m_pObjectHolder.Get(),
190 m_pObjectHolder->m_pResources.Get(), m_pObjectHolder->GetBBox(),
191 nullptr, m_pParsedSet.get());
192 m_pParser->GetCurStates()->m_ColorState.SetDefault();
193 }
194 if (m_CurrentOffset >= m_Size)
195 return Stage::kCheckClip;
196
197 if (m_StreamSegmentOffsets.empty())
198 m_StreamSegmentOffsets.push_back(0);
199
200 static constexpr uint32_t kParseStepLimit = 100;
201 m_CurrentOffset += m_pParser->Parse(m_pData.Get(), m_Size, m_CurrentOffset,
202 kParseStepLimit, m_StreamSegmentOffsets);
203 return Stage::kParse;
204 }
205
CheckClip()206 CPDF_ContentParser::Stage CPDF_ContentParser::CheckClip() {
207 if (m_pType3Char) {
208 m_pType3Char->InitializeFromStreamData(m_pParser->IsColored(),
209 m_pParser->GetType3Data());
210 }
211
212 for (auto& pObj : *m_pObjectHolder) {
213 if (!pObj->m_ClipPath.HasRef())
214 continue;
215 if (pObj->m_ClipPath.GetPathCount() != 1)
216 continue;
217 if (pObj->m_ClipPath.GetTextCount() > 0)
218 continue;
219
220 CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
221 if (!ClipPath.IsRect() || pObj->IsShading())
222 continue;
223
224 CFX_PointF point0 = ClipPath.GetPoint(0);
225 CFX_PointF point2 = ClipPath.GetPoint(2);
226 CFX_FloatRect old_rect(point0.x, point0.y, point2.x, point2.y);
227 if (old_rect.Contains(pObj->GetRect()))
228 pObj->m_ClipPath.SetNull();
229 }
230 return Stage::kComplete;
231 }
232
HandlePageContentStream(CPDF_Stream * pStream)233 void CPDF_ContentParser::HandlePageContentStream(CPDF_Stream* pStream) {
234 m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
235 m_pSingleStream->LoadAllDataFiltered();
236 m_CurrentStage = Stage::kPrepareContent;
237 }
238
HandlePageContentArray(CPDF_Array * pArray)239 bool CPDF_ContentParser::HandlePageContentArray(CPDF_Array* pArray) {
240 m_nStreams = pArray->size();
241 if (m_nStreams == 0)
242 return false;
243
244 m_StreamArray.resize(m_nStreams);
245 return true;
246 }
247
HandlePageContentFailure()248 void CPDF_ContentParser::HandlePageContentFailure() {
249 m_CurrentStage = Stage::kComplete;
250 }
251