• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/page/cpdf_contentparser.h"
8 
9 #include <utility>
10 
11 #include "constants/page_object.h"
12 #include "core/fpdfapi/font/cpdf_type3char.h"
13 #include "core/fpdfapi/page/cpdf_allstates.h"
14 #include "core/fpdfapi/page/cpdf_page.h"
15 #include "core/fpdfapi/page/cpdf_pageobject.h"
16 #include "core/fpdfapi/page/cpdf_path.h"
17 #include "core/fpdfapi/parser/cpdf_array.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_stream.h"
20 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
21 #include "core/fxcrt/check.h"
22 #include "core/fxcrt/check_op.h"
23 #include "core/fxcrt/fixed_size_data_vector.h"
24 #include "core/fxcrt/fx_safe_types.h"
25 #include "core/fxcrt/pauseindicator_iface.h"
26 #include "core/fxcrt/span_util.h"
27 #include "core/fxcrt/stl_util.h"
28 #include "core/fxge/cfx_fillrenderoptions.h"
29 
CPDF_ContentParser(CPDF_Page * pPage)30 CPDF_ContentParser::CPDF_ContentParser(CPDF_Page* pPage)
31     : m_CurrentStage(Stage::kGetContent), m_pPageObjectHolder(pPage) {
32   DCHECK(pPage);
33   if (!pPage->GetDocument()) {
34     m_CurrentStage = Stage::kComplete;
35     return;
36   }
37 
38   RetainPtr<CPDF_Object> pContent =
39       pPage->GetMutableDict()->GetMutableDirectObjectFor(
40           pdfium::page_object::kContents);
41   if (!pContent) {
42     HandlePageContentFailure();
43     return;
44   }
45 
46   const CPDF_Stream* pStream = pContent->AsStream();
47   if (pStream) {
48     HandlePageContentStream(pStream);
49     return;
50   }
51 
52   const CPDF_Array* pArray = pContent->AsArray();
53   if (pArray && HandlePageContentArray(pArray))
54     return;
55 
56   HandlePageContentFailure();
57 }
58 
CPDF_ContentParser(RetainPtr<const CPDF_Stream> pStream,CPDF_PageObjectHolder * pPageObjectHolder,const CPDF_AllStates * pGraphicStates,const CFX_Matrix * pParentMatrix,CPDF_Type3Char * pType3Char,CPDF_Form::RecursionState * recursion_state)59 CPDF_ContentParser::CPDF_ContentParser(
60     RetainPtr<const CPDF_Stream> pStream,
61     CPDF_PageObjectHolder* pPageObjectHolder,
62     const CPDF_AllStates* pGraphicStates,
63     const CFX_Matrix* pParentMatrix,
64     CPDF_Type3Char* pType3Char,
65     CPDF_Form::RecursionState* recursion_state)
66     : m_CurrentStage(Stage::kParse),
67       m_pPageObjectHolder(pPageObjectHolder),
68       m_pType3Char(pType3Char) {
69   DCHECK(m_pPageObjectHolder);
70   CFX_Matrix form_matrix =
71       m_pPageObjectHolder->GetDict()->GetMatrixFor("Matrix");
72   if (pGraphicStates)
73     form_matrix.Concat(pGraphicStates->current_transformation_matrix());
74 
75   RetainPtr<const CPDF_Array> pBBox =
76       m_pPageObjectHolder->GetDict()->GetArrayFor("BBox");
77   CFX_FloatRect form_bbox;
78   CPDF_Path ClipPath;
79   if (pBBox) {
80     form_bbox = pBBox->GetRect();
81     ClipPath.Emplace();
82     ClipPath.AppendFloatRect(form_bbox);
83     ClipPath.Transform(form_matrix);
84     if (pParentMatrix)
85       ClipPath.Transform(*pParentMatrix);
86 
87     form_bbox = form_matrix.TransformRect(form_bbox);
88     if (pParentMatrix)
89       form_bbox = pParentMatrix->TransformRect(form_bbox);
90   }
91 
92   RetainPtr<CPDF_Dictionary> pResources =
93       m_pPageObjectHolder->GetMutableDict()->GetMutableDictFor("Resources");
94   m_pParser = std::make_unique<CPDF_StreamContentParser>(
95       m_pPageObjectHolder->GetDocument(),
96       m_pPageObjectHolder->GetMutablePageResources(),
97       m_pPageObjectHolder->GetMutableResources(), pParentMatrix,
98       m_pPageObjectHolder, std::move(pResources), form_bbox, pGraphicStates,
99       recursion_state);
100   m_pParser->GetCurStates()->set_current_transformation_matrix(form_matrix);
101   m_pParser->GetCurStates()->set_parent_matrix(form_matrix);
102   if (ClipPath.HasRef()) {
103     m_pParser->GetCurStates()->mutable_clip_path().AppendPathWithAutoMerge(
104         ClipPath, CFX_FillRenderOptions::FillType::kWinding);
105   }
106   if (m_pPageObjectHolder->GetTransparency().IsGroup()) {
107     CPDF_GeneralState& state =
108         m_pParser->GetCurStates()->mutable_general_state();
109     state.SetBlendType(BlendMode::kNormal);
110     state.SetStrokeAlpha(1.0f);
111     state.SetFillAlpha(1.0f);
112     state.SetSoftMask(nullptr);
113   }
114   m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStream));
115   m_pSingleStream->LoadAllDataFiltered();
116   m_Data = m_pSingleStream->GetSpan();
117 }
118 
119 CPDF_ContentParser::~CPDF_ContentParser() = default;
120 
TakeAllCTMs()121 CPDF_PageObjectHolder::CTMMap CPDF_ContentParser::TakeAllCTMs() {
122   return m_pParser ? m_pParser->TakeAllCTMs() : CPDF_PageObjectHolder::CTMMap();
123 }
124 
125 // Returning |true| means that there is more content to be processed and
126 // Continue() should be called again. Returning |false| means that we've
127 // completed the parse and Continue() is complete.
Continue(PauseIndicatorIface * pPause)128 bool CPDF_ContentParser::Continue(PauseIndicatorIface* pPause) {
129   while (m_CurrentStage == Stage::kGetContent) {
130     m_CurrentStage = GetContent();
131     if (pPause && pPause->NeedToPauseNow())
132       return true;
133   }
134 
135   if (m_CurrentStage == Stage::kPrepareContent)
136     m_CurrentStage = PrepareContent();
137 
138   while (m_CurrentStage == Stage::kParse) {
139     m_CurrentStage = Parse();
140     if (pPause && pPause->NeedToPauseNow())
141       return true;
142   }
143 
144   if (m_CurrentStage == Stage::kCheckClip)
145     m_CurrentStage = CheckClip();
146 
147   DCHECK_EQ(m_CurrentStage, Stage::kComplete);
148   return false;
149 }
150 
GetContent()151 CPDF_ContentParser::Stage CPDF_ContentParser::GetContent() {
152   DCHECK_EQ(m_CurrentStage, Stage::kGetContent);
153   DCHECK(m_pPageObjectHolder->IsPage());
154   RetainPtr<const CPDF_Array> pContent =
155       m_pPageObjectHolder->GetDict()->GetArrayFor(
156           pdfium::page_object::kContents);
157   RetainPtr<const CPDF_Stream> pStreamObj = ToStream(
158       pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr);
159   m_StreamArray[m_CurrentOffset] =
160       pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStreamObj));
161   m_StreamArray[m_CurrentOffset]->LoadAllDataFiltered();
162   m_CurrentOffset++;
163 
164   return m_CurrentOffset == m_nStreams ? Stage::kPrepareContent
165                                        : Stage::kGetContent;
166 }
167 
PrepareContent()168 CPDF_ContentParser::Stage CPDF_ContentParser::PrepareContent() {
169   m_CurrentOffset = 0;
170 
171   if (m_StreamArray.empty()) {
172     m_Data = m_pSingleStream->GetSpan();
173     return Stage::kParse;
174   }
175 
176   FX_SAFE_UINT32 safe_size = 0;
177   for (const auto& stream : m_StreamArray) {
178     m_StreamSegmentOffsets.push_back(safe_size.ValueOrDie());
179     safe_size += stream->GetSize();
180     safe_size += 1;
181     if (!safe_size.IsValid())
182       return Stage::kComplete;
183   }
184 
185   const size_t buffer_size = safe_size.ValueOrDie();
186   auto buffer = FixedSizeDataVector<uint8_t>::TryZeroed(buffer_size);
187   if (buffer.empty()) {
188     m_Data.emplace<pdfium::raw_span<const uint8_t>>();
189     return Stage::kComplete;
190   }
191 
192   auto data_span = buffer.span();
193   for (const auto& stream : m_StreamArray) {
194     data_span = fxcrt::spancpy(data_span, stream->GetSpan());
195     data_span.front() = ' ';
196     data_span = data_span.subspan(1);
197   }
198   m_StreamArray.clear();
199   m_Data = std::move(buffer);
200   return Stage::kParse;
201 }
202 
Parse()203 CPDF_ContentParser::Stage CPDF_ContentParser::Parse() {
204   if (!m_pParser) {
205     m_RecursionState.parsed_set.clear();
206     m_pParser = std::make_unique<CPDF_StreamContentParser>(
207         m_pPageObjectHolder->GetDocument(),
208         m_pPageObjectHolder->GetMutablePageResources(), nullptr, nullptr,
209         m_pPageObjectHolder, m_pPageObjectHolder->GetMutableResources(),
210         m_pPageObjectHolder->GetBBox(), nullptr, &m_RecursionState);
211     m_pParser->GetCurStates()->mutable_color_state().SetDefault();
212   }
213   if (m_CurrentOffset >= GetData().size())
214     return Stage::kCheckClip;
215 
216   if (m_StreamSegmentOffsets.empty())
217     m_StreamSegmentOffsets.push_back(0);
218 
219   static constexpr uint32_t kParseStepLimit = 100;
220   m_CurrentOffset += m_pParser->Parse(GetData(), m_CurrentOffset,
221                                       kParseStepLimit, m_StreamSegmentOffsets);
222   return Stage::kParse;
223 }
224 
CheckClip()225 CPDF_ContentParser::Stage CPDF_ContentParser::CheckClip() {
226   if (m_pType3Char) {
227     m_pType3Char->InitializeFromStreamData(m_pParser->IsColored(),
228                                            m_pParser->GetType3Data());
229   }
230 
231   for (auto& pObj : *m_pPageObjectHolder) {
232     if (!pObj->IsActive()) {
233       continue;
234     }
235     CPDF_ClipPath& clip_path = pObj->mutable_clip_path();
236     if (!clip_path.HasRef()) {
237       continue;
238     }
239     if (clip_path.GetPathCount() != 1) {
240       continue;
241     }
242     if (clip_path.GetTextCount() > 0) {
243       continue;
244     }
245 
246     CPDF_Path path = clip_path.GetPath(0);
247     if (!path.IsRect() || pObj->IsShading()) {
248       continue;
249     }
250 
251     CFX_PointF point0 = path.GetPoint(0);
252     CFX_PointF point2 = path.GetPoint(2);
253     CFX_FloatRect old_rect(point0.x, point0.y, point2.x, point2.y);
254     if (old_rect.Contains(pObj->GetRect()))
255       clip_path.SetNull();
256   }
257   return Stage::kComplete;
258 }
259 
HandlePageContentStream(const CPDF_Stream * pStream)260 void CPDF_ContentParser::HandlePageContentStream(const CPDF_Stream* pStream) {
261   m_pSingleStream =
262       pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pStream));
263   m_pSingleStream->LoadAllDataFiltered();
264   m_CurrentStage = Stage::kPrepareContent;
265 }
266 
HandlePageContentArray(const CPDF_Array * pArray)267 bool CPDF_ContentParser::HandlePageContentArray(const CPDF_Array* pArray) {
268   m_nStreams = fxcrt::CollectionSize<uint32_t>(*pArray);
269   if (m_nStreams == 0)
270     return false;
271 
272   m_StreamArray.resize(m_nStreams);
273   return true;
274 }
275 
HandlePageContentFailure()276 void CPDF_ContentParser::HandlePageContentFailure() {
277   m_CurrentStage = Stage::kComplete;
278 }
279 
GetData() const280 pdfium::span<const uint8_t> CPDF_ContentParser::GetData() const {
281   if (is_owned()) {
282     return absl::get<FixedSizeDataVector<uint8_t>>(m_Data).span();
283   }
284   return absl::get<pdfium::raw_span<const uint8_t>>(m_Data);
285 }
286