1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/page/cpdf_contentparser.h"
8
9 #include <utility>
10
11 #include "constants/page_object.h"
12 #include "core/fpdfapi/font/cpdf_type3char.h"
13 #include "core/fpdfapi/page/cpdf_allstates.h"
14 #include "core/fpdfapi/page/cpdf_page.h"
15 #include "core/fpdfapi/page/cpdf_pageobject.h"
16 #include "core/fpdfapi/page/cpdf_path.h"
17 #include "core/fpdfapi/parser/cpdf_array.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_stream.h"
20 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
21 #include "core/fxcrt/fixed_try_alloc_zeroed_data_vector.h"
22 #include "core/fxcrt/fx_safe_types.h"
23 #include "core/fxcrt/pauseindicator_iface.h"
24 #include "core/fxcrt/span_util.h"
25 #include "core/fxcrt/stl_util.h"
26 #include "core/fxge/cfx_fillrenderoptions.h"
27 #include "third_party/base/check.h"
28 #include "third_party/base/check_op.h"
29
CPDF_ContentParser(CPDF_Page * pPage)30 CPDF_ContentParser::CPDF_ContentParser(CPDF_Page* pPage)
31 : m_CurrentStage(Stage::kGetContent), m_pPageObjectHolder(pPage) {
32 DCHECK(pPage);
33 if (!pPage->GetDocument()) {
34 m_CurrentStage = Stage::kComplete;
35 return;
36 }
37
38 RetainPtr<CPDF_Object> pContent =
39 pPage->GetMutableDict()->GetMutableDirectObjectFor(
40 pdfium::page_object::kContents);
41 if (!pContent) {
42 HandlePageContentFailure();
43 return;
44 }
45
46 const CPDF_Stream* pStream = pContent->AsStream();
47 if (pStream) {
48 HandlePageContentStream(pStream);
49 return;
50 }
51
52 const CPDF_Array* pArray = pContent->AsArray();
53 if (pArray && HandlePageContentArray(pArray))
54 return;
55
56 HandlePageContentFailure();
57 }
58
CPDF_ContentParser(RetainPtr<const CPDF_Stream> pStream,CPDF_PageObjectHolder * pPageObjectHolder,const CPDF_AllStates * pGraphicStates,const CFX_Matrix * pParentMatrix,CPDF_Type3Char * pType3Char,std::set<const uint8_t * > * pParsedSet)59 CPDF_ContentParser::CPDF_ContentParser(RetainPtr<const CPDF_Stream> pStream,
60 CPDF_PageObjectHolder* pPageObjectHolder,
61 const CPDF_AllStates* pGraphicStates,
62 const CFX_Matrix* pParentMatrix,
63 CPDF_Type3Char* pType3Char,
64 std::set<const uint8_t*>* pParsedSet)
65 : m_CurrentStage(Stage::kParse),
66 m_pPageObjectHolder(pPageObjectHolder),
67 m_pType3Char(pType3Char) {
68 DCHECK(m_pPageObjectHolder);
69 CFX_Matrix form_matrix =
70 m_pPageObjectHolder->GetDict()->GetMatrixFor("Matrix");
71 if (pGraphicStates)
72 form_matrix.Concat(pGraphicStates->m_CTM);
73
74 RetainPtr<const CPDF_Array> pBBox =
75 m_pPageObjectHolder->GetDict()->GetArrayFor("BBox");
76 CFX_FloatRect form_bbox;
77 CPDF_Path ClipPath;
78 if (pBBox) {
79 form_bbox = pBBox->GetRect();
80 ClipPath.Emplace();
81 ClipPath.AppendFloatRect(form_bbox);
82 ClipPath.Transform(form_matrix);
83 if (pParentMatrix)
84 ClipPath.Transform(*pParentMatrix);
85
86 form_bbox = form_matrix.TransformRect(form_bbox);
87 if (pParentMatrix)
88 form_bbox = pParentMatrix->TransformRect(form_bbox);
89 }
90
91 RetainPtr<CPDF_Dictionary> pResources =
92 m_pPageObjectHolder->GetMutableDict()->GetMutableDictFor("Resources");
93 m_pParser = std::make_unique<CPDF_StreamContentParser>(
94 m_pPageObjectHolder->GetDocument(),
95 m_pPageObjectHolder->GetMutablePageResources(),
96 m_pPageObjectHolder->GetMutableResources(), pParentMatrix,
97 m_pPageObjectHolder, std::move(pResources), form_bbox, pGraphicStates,
98 pParsedSet);
99 m_pParser->GetCurStates()->m_CTM = form_matrix;
100 m_pParser->GetCurStates()->m_ParentMatrix = form_matrix;
101 if (ClipPath.HasRef()) {
102 m_pParser->GetCurStates()->m_ClipPath.AppendPathWithAutoMerge(
103 ClipPath, CFX_FillRenderOptions::FillType::kWinding);
104 }
105 if (m_pPageObjectHolder->GetTransparency().IsGroup()) {
106 CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState;
107 pState->SetBlendType(BlendMode::kNormal);
108 pState->SetStrokeAlpha(1.0f);
109 pState->SetFillAlpha(1.0f);
110 pState->SetSoftMask(nullptr);
111 }
112 m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStream));
113 m_pSingleStream->LoadAllDataFiltered();
114 m_Data = m_pSingleStream->GetSpan();
115 }
116
117 CPDF_ContentParser::~CPDF_ContentParser() = default;
118
119 // Returning |true| means that there is more content to be processed and
120 // Continue() should be called again. Returning |false| means that we've
121 // completed the parse and Continue() is complete.
Continue(PauseIndicatorIface * pPause)122 bool CPDF_ContentParser::Continue(PauseIndicatorIface* pPause) {
123 while (m_CurrentStage == Stage::kGetContent) {
124 m_CurrentStage = GetContent();
125 if (pPause && pPause->NeedToPauseNow())
126 return true;
127 }
128
129 if (m_CurrentStage == Stage::kPrepareContent)
130 m_CurrentStage = PrepareContent();
131
132 while (m_CurrentStage == Stage::kParse) {
133 m_CurrentStage = Parse();
134 if (pPause && pPause->NeedToPauseNow())
135 return true;
136 }
137
138 if (m_CurrentStage == Stage::kCheckClip)
139 m_CurrentStage = CheckClip();
140
141 DCHECK_EQ(m_CurrentStage, Stage::kComplete);
142 return false;
143 }
144
GetContent()145 CPDF_ContentParser::Stage CPDF_ContentParser::GetContent() {
146 DCHECK_EQ(m_CurrentStage, Stage::kGetContent);
147 DCHECK(m_pPageObjectHolder->IsPage());
148 RetainPtr<const CPDF_Array> pContent =
149 m_pPageObjectHolder->GetDict()->GetArrayFor(
150 pdfium::page_object::kContents);
151 RetainPtr<const CPDF_Stream> pStreamObj = ToStream(
152 pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr);
153 m_StreamArray[m_CurrentOffset] =
154 pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStreamObj));
155 m_StreamArray[m_CurrentOffset]->LoadAllDataFiltered();
156 m_CurrentOffset++;
157
158 return m_CurrentOffset == m_nStreams ? Stage::kPrepareContent
159 : Stage::kGetContent;
160 }
161
PrepareContent()162 CPDF_ContentParser::Stage CPDF_ContentParser::PrepareContent() {
163 m_CurrentOffset = 0;
164
165 if (m_StreamArray.empty()) {
166 m_Data = m_pSingleStream->GetSpan();
167 return Stage::kParse;
168 }
169
170 FX_SAFE_UINT32 safe_size = 0;
171 for (const auto& stream : m_StreamArray) {
172 m_StreamSegmentOffsets.push_back(safe_size.ValueOrDie());
173 safe_size += stream->GetSize();
174 safe_size += 1;
175 if (!safe_size.IsValid())
176 return Stage::kComplete;
177 }
178
179 const size_t buffer_size = safe_size.ValueOrDie();
180 FixedTryAllocZeroedDataVector<uint8_t> buffer(buffer_size);
181 if (buffer.empty()) {
182 m_Data.emplace<pdfium::span<const uint8_t>>();
183 return Stage::kComplete;
184 }
185
186 size_t pos = 0;
187 auto data_span = buffer.writable_span();
188 for (const auto& stream : m_StreamArray) {
189 fxcrt::spancpy(data_span.subspan(pos), stream->GetSpan());
190 pos += stream->GetSize();
191 data_span[pos++] = ' ';
192 }
193 m_StreamArray.clear();
194 m_Data = std::move(buffer);
195 return Stage::kParse;
196 }
197
Parse()198 CPDF_ContentParser::Stage CPDF_ContentParser::Parse() {
199 if (!m_pParser) {
200 m_ParsedSet.clear();
201 m_pParser = std::make_unique<CPDF_StreamContentParser>(
202 m_pPageObjectHolder->GetDocument(),
203 m_pPageObjectHolder->GetMutablePageResources(), nullptr, nullptr,
204 m_pPageObjectHolder, m_pPageObjectHolder->GetMutableResources(),
205 m_pPageObjectHolder->GetBBox(), nullptr, &m_ParsedSet);
206 m_pParser->GetCurStates()->m_ColorState.SetDefault();
207 }
208 if (m_CurrentOffset >= GetData().size())
209 return Stage::kCheckClip;
210
211 if (m_StreamSegmentOffsets.empty())
212 m_StreamSegmentOffsets.push_back(0);
213
214 static constexpr uint32_t kParseStepLimit = 100;
215 m_CurrentOffset += m_pParser->Parse(GetData(), m_CurrentOffset,
216 kParseStepLimit, m_StreamSegmentOffsets);
217 return Stage::kParse;
218 }
219
CheckClip()220 CPDF_ContentParser::Stage CPDF_ContentParser::CheckClip() {
221 if (m_pType3Char) {
222 m_pType3Char->InitializeFromStreamData(m_pParser->IsColored(),
223 m_pParser->GetType3Data());
224 }
225
226 for (auto& pObj : *m_pPageObjectHolder) {
227 if (!pObj->m_ClipPath.HasRef())
228 continue;
229 if (pObj->m_ClipPath.GetPathCount() != 1)
230 continue;
231 if (pObj->m_ClipPath.GetTextCount() > 0)
232 continue;
233
234 CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
235 if (!ClipPath.IsRect() || pObj->IsShading())
236 continue;
237
238 CFX_PointF point0 = ClipPath.GetPoint(0);
239 CFX_PointF point2 = ClipPath.GetPoint(2);
240 CFX_FloatRect old_rect(point0.x, point0.y, point2.x, point2.y);
241 if (old_rect.Contains(pObj->GetRect()))
242 pObj->m_ClipPath.SetNull();
243 }
244 return Stage::kComplete;
245 }
246
HandlePageContentStream(const CPDF_Stream * pStream)247 void CPDF_ContentParser::HandlePageContentStream(const CPDF_Stream* pStream) {
248 m_pSingleStream =
249 pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pStream));
250 m_pSingleStream->LoadAllDataFiltered();
251 m_CurrentStage = Stage::kPrepareContent;
252 }
253
HandlePageContentArray(const CPDF_Array * pArray)254 bool CPDF_ContentParser::HandlePageContentArray(const CPDF_Array* pArray) {
255 m_nStreams = fxcrt::CollectionSize<uint32_t>(*pArray);
256 if (m_nStreams == 0)
257 return false;
258
259 m_StreamArray.resize(m_nStreams);
260 return true;
261 }
262
HandlePageContentFailure()263 void CPDF_ContentParser::HandlePageContentFailure() {
264 m_CurrentStage = Stage::kComplete;
265 }
266
GetData() const267 pdfium::span<const uint8_t> CPDF_ContentParser::GetData() const {
268 if (is_owned())
269 return absl::get<FixedTryAllocZeroedDataVector<uint8_t>>(m_Data).span();
270 return absl::get<pdfium::span<const uint8_t>>(m_Data);
271 }
272