• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/page/cpdf_contentparser.h"
8 
9 #include "constants/page_object.h"
10 #include "core/fpdfapi/font/cpdf_type3char.h"
11 #include "core/fpdfapi/page/cpdf_allstates.h"
12 #include "core/fpdfapi/page/cpdf_form.h"
13 #include "core/fpdfapi/page/cpdf_page.h"
14 #include "core/fpdfapi/page/cpdf_pageobject.h"
15 #include "core/fpdfapi/page/cpdf_path.h"
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
18 #include "core/fpdfapi/parser/cpdf_stream.h"
19 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
20 #include "core/fxcrt/fx_safe_types.h"
21 #include "core/fxcrt/pauseindicator_iface.h"
22 #include "core/fxge/render_defines.h"
23 #include "third_party/base/ptr_util.h"
24 
CPDF_ContentParser(CPDF_Page * pPage)25 CPDF_ContentParser::CPDF_ContentParser(CPDF_Page* pPage)
26     : m_CurrentStage(Stage::kGetContent), m_pObjectHolder(pPage) {
27   ASSERT(pPage);
28   if (!pPage->GetDocument()) {
29     m_CurrentStage = Stage::kComplete;
30     return;
31   }
32 
33   CPDF_Object* pContent =
34       pPage->GetDict()->GetDirectObjectFor(pdfium::page_object::kContents);
35   if (!pContent) {
36     HandlePageContentFailure();
37     return;
38   }
39 
40   CPDF_Stream* pStream = pContent->AsStream();
41   if (pStream) {
42     HandlePageContentStream(pStream);
43     return;
44   }
45 
46   CPDF_Array* pArray = pContent->AsArray();
47   if (pArray && HandlePageContentArray(pArray))
48     return;
49 
50   HandlePageContentFailure();
51 }
52 
CPDF_ContentParser(CPDF_Form * pForm,const CPDF_AllStates * pGraphicStates,const CFX_Matrix * pParentMatrix,CPDF_Type3Char * pType3Char,std::set<const uint8_t * > * pParsedSet)53 CPDF_ContentParser::CPDF_ContentParser(CPDF_Form* pForm,
54                                        const CPDF_AllStates* pGraphicStates,
55                                        const CFX_Matrix* pParentMatrix,
56                                        CPDF_Type3Char* pType3Char,
57                                        std::set<const uint8_t*>* pParsedSet)
58     : m_CurrentStage(Stage::kParse),
59       m_pObjectHolder(pForm),
60       m_pType3Char(pType3Char) {
61   ASSERT(pForm);
62   CFX_Matrix form_matrix = pForm->GetDict()->GetMatrixFor("Matrix");
63   if (pGraphicStates)
64     form_matrix.Concat(pGraphicStates->m_CTM);
65 
66   CPDF_Array* pBBox = pForm->GetDict()->GetArrayFor("BBox");
67   CFX_FloatRect form_bbox;
68   CPDF_Path ClipPath;
69   if (pBBox) {
70     form_bbox = pBBox->GetRect();
71     ClipPath.Emplace();
72     ClipPath.AppendFloatRect(form_bbox);
73     ClipPath.Transform(form_matrix);
74     if (pParentMatrix)
75       ClipPath.Transform(*pParentMatrix);
76 
77     form_bbox = form_matrix.TransformRect(form_bbox);
78     if (pParentMatrix)
79       form_bbox = pParentMatrix->TransformRect(form_bbox);
80   }
81 
82   CPDF_Dictionary* pResources = pForm->GetDict()->GetDictFor("Resources");
83   m_pParser = pdfium::MakeUnique<CPDF_StreamContentParser>(
84       pForm->GetDocument(), pForm->m_pPageResources.Get(),
85       pForm->m_pResources.Get(), pParentMatrix, pForm, pResources, form_bbox,
86       pGraphicStates, pParsedSet);
87   m_pParser->GetCurStates()->m_CTM = form_matrix;
88   m_pParser->GetCurStates()->m_ParentMatrix = form_matrix;
89   if (ClipPath.HasRef()) {
90     m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING,
91                                                      true);
92   }
93   if (pForm->GetTransparency().IsGroup()) {
94     CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState;
95     pState->SetBlendType(BlendMode::kNormal);
96     pState->SetStrokeAlpha(1.0f);
97     pState->SetFillAlpha(1.0f);
98     pState->SetSoftMask(nullptr);
99   }
100   m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(pForm->GetStream());
101   m_pSingleStream->LoadAllDataFiltered();
102   m_pData.Reset(m_pSingleStream->GetData());
103   m_Size = m_pSingleStream->GetSize();
104 }
105 
~CPDF_ContentParser()106 CPDF_ContentParser::~CPDF_ContentParser() {}
107 
108 // Returning |true| means that there is more content to be processed and
109 // Continue() should be called again. Returning |false| means that we've
110 // completed the parse and Continue() is complete.
Continue(PauseIndicatorIface * pPause)111 bool CPDF_ContentParser::Continue(PauseIndicatorIface* pPause) {
112   while (m_CurrentStage == Stage::kGetContent) {
113     m_CurrentStage = GetContent();
114     if (pPause && pPause->NeedToPauseNow())
115       return true;
116   }
117 
118   if (m_CurrentStage == Stage::kPrepareContent)
119     m_CurrentStage = PrepareContent();
120 
121   while (m_CurrentStage == Stage::kParse) {
122     m_CurrentStage = Parse();
123     if (pPause && pPause->NeedToPauseNow())
124       return true;
125   }
126 
127   if (m_CurrentStage == Stage::kCheckClip)
128     m_CurrentStage = CheckClip();
129 
130   ASSERT(m_CurrentStage == Stage::kComplete);
131   return false;
132 }
133 
GetContent()134 CPDF_ContentParser::Stage CPDF_ContentParser::GetContent() {
135   ASSERT(m_CurrentStage == Stage::kGetContent);
136   ASSERT(m_pObjectHolder->IsPage());
137   CPDF_Array* pContent =
138       m_pObjectHolder->GetDict()->GetArrayFor(pdfium::page_object::kContents);
139   CPDF_Stream* pStreamObj = ToStream(
140       pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr);
141   m_StreamArray[m_CurrentOffset] =
142       pdfium::MakeRetain<CPDF_StreamAcc>(pStreamObj);
143   m_StreamArray[m_CurrentOffset]->LoadAllDataFiltered();
144   m_CurrentOffset++;
145 
146   return m_CurrentOffset == m_nStreams ? Stage::kPrepareContent
147                                        : Stage::kGetContent;
148 }
149 
PrepareContent()150 CPDF_ContentParser::Stage CPDF_ContentParser::PrepareContent() {
151   m_CurrentOffset = 0;
152 
153   if (m_StreamArray.empty()) {
154     m_pData.Reset(m_pSingleStream->GetData());
155     m_Size = m_pSingleStream->GetSize();
156     return Stage::kParse;
157   }
158 
159   FX_SAFE_UINT32 safeSize = 0;
160   for (const auto& stream : m_StreamArray) {
161     m_StreamSegmentOffsets.push_back(safeSize.ValueOrDie());
162 
163     safeSize += stream->GetSize();
164     safeSize += 1;
165     if (!safeSize.IsValid())
166       return Stage::kComplete;
167   }
168 
169   m_Size = safeSize.ValueOrDie();
170   m_pData.Reset(
171       std::unique_ptr<uint8_t, FxFreeDeleter>(FX_Alloc(uint8_t, m_Size)));
172 
173   uint32_t pos = 0;
174   for (const auto& stream : m_StreamArray) {
175     memcpy(m_pData.Get() + pos, stream->GetData(), stream->GetSize());
176     pos += stream->GetSize();
177     m_pData.Get()[pos++] = ' ';
178   }
179   m_StreamArray.clear();
180 
181   return Stage::kParse;
182 }
183 
Parse()184 CPDF_ContentParser::Stage CPDF_ContentParser::Parse() {
185   if (!m_pParser) {
186     m_pParsedSet = pdfium::MakeUnique<std::set<const uint8_t*>>();
187     m_pParser = pdfium::MakeUnique<CPDF_StreamContentParser>(
188         m_pObjectHolder->GetDocument(), m_pObjectHolder->m_pPageResources.Get(),
189         nullptr, nullptr, m_pObjectHolder.Get(),
190         m_pObjectHolder->m_pResources.Get(), m_pObjectHolder->GetBBox(),
191         nullptr, m_pParsedSet.get());
192     m_pParser->GetCurStates()->m_ColorState.SetDefault();
193   }
194   if (m_CurrentOffset >= m_Size)
195     return Stage::kCheckClip;
196 
197   if (m_StreamSegmentOffsets.empty())
198     m_StreamSegmentOffsets.push_back(0);
199 
200   static constexpr uint32_t kParseStepLimit = 100;
201   m_CurrentOffset += m_pParser->Parse(m_pData.Get(), m_Size, m_CurrentOffset,
202                                       kParseStepLimit, m_StreamSegmentOffsets);
203   return Stage::kParse;
204 }
205 
CheckClip()206 CPDF_ContentParser::Stage CPDF_ContentParser::CheckClip() {
207   if (m_pType3Char) {
208     m_pType3Char->InitializeFromStreamData(m_pParser->IsColored(),
209                                            m_pParser->GetType3Data());
210   }
211 
212   for (auto& pObj : *m_pObjectHolder) {
213     if (!pObj->m_ClipPath.HasRef())
214       continue;
215     if (pObj->m_ClipPath.GetPathCount() != 1)
216       continue;
217     if (pObj->m_ClipPath.GetTextCount() > 0)
218       continue;
219 
220     CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
221     if (!ClipPath.IsRect() || pObj->IsShading())
222       continue;
223 
224     CFX_PointF point0 = ClipPath.GetPoint(0);
225     CFX_PointF point2 = ClipPath.GetPoint(2);
226     CFX_FloatRect old_rect(point0.x, point0.y, point2.x, point2.y);
227     if (old_rect.Contains(pObj->GetRect()))
228       pObj->m_ClipPath.SetNull();
229   }
230   return Stage::kComplete;
231 }
232 
HandlePageContentStream(CPDF_Stream * pStream)233 void CPDF_ContentParser::HandlePageContentStream(CPDF_Stream* pStream) {
234   m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
235   m_pSingleStream->LoadAllDataFiltered();
236   m_CurrentStage = Stage::kPrepareContent;
237 }
238 
HandlePageContentArray(CPDF_Array * pArray)239 bool CPDF_ContentParser::HandlePageContentArray(CPDF_Array* pArray) {
240   m_nStreams = pArray->size();
241   if (m_nStreams == 0)
242     return false;
243 
244   m_StreamArray.resize(m_nStreams);
245   return true;
246 }
247 
HandlePageContentFailure()248 void CPDF_ContentParser::HandlePageContentFailure() {
249   m_CurrentStage = Stage::kComplete;
250 }
251