• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/edit/cpdf_pagecontentgenerator.h"
8 
9 #include <map>
10 #include <memory>
11 #include <optional>
12 #include <set>
13 #include <sstream>
14 #include <tuple>
15 #include <utility>
16 
17 #include "constants/page_object.h"
18 #include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
19 #include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
20 #include "core/fpdfapi/edit/cpdf_stringarchivestream.h"
21 #include "core/fpdfapi/font/cpdf_truetypefont.h"
22 #include "core/fpdfapi/font/cpdf_type1font.h"
23 #include "core/fpdfapi/page/cpdf_contentmarks.h"
24 #include "core/fpdfapi/page/cpdf_docpagedata.h"
25 #include "core/fpdfapi/page/cpdf_form.h"
26 #include "core/fpdfapi/page/cpdf_formobject.h"
27 #include "core/fpdfapi/page/cpdf_image.h"
28 #include "core/fpdfapi/page/cpdf_imageobject.h"
29 #include "core/fpdfapi/page/cpdf_page.h"
30 #include "core/fpdfapi/page/cpdf_path.h"
31 #include "core/fpdfapi/page/cpdf_pathobject.h"
32 #include "core/fpdfapi/page/cpdf_textobject.h"
33 #include "core/fpdfapi/parser/cpdf_array.h"
34 #include "core/fpdfapi/parser/cpdf_dictionary.h"
35 #include "core/fpdfapi/parser/cpdf_document.h"
36 #include "core/fpdfapi/parser/cpdf_name.h"
37 #include "core/fpdfapi/parser/cpdf_number.h"
38 #include "core/fpdfapi/parser/cpdf_reference.h"
39 #include "core/fpdfapi/parser/cpdf_stream.h"
40 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
41 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
42 #include "core/fpdfapi/parser/object_tree_traversal_util.h"
43 #include "core/fxcrt/check.h"
44 #include "core/fxcrt/check_op.h"
45 #include "core/fxcrt/containers/contains.h"
46 #include "core/fxcrt/notreached.h"
47 #include "core/fxcrt/numerics/safe_conversions.h"
48 #include "core/fxcrt/span.h"
49 
50 namespace {
51 
52 // Key: The resource type.
53 // Value: The resource names of a given type.
54 using ResourcesMap = std::map<ByteString, std::set<ByteString>>;
55 
56 // Returns whether it wrote to `buf` or not.
WriteColorToStream(fxcrt::ostringstream & buf,const CPDF_Color * color)57 bool WriteColorToStream(fxcrt::ostringstream& buf, const CPDF_Color* color) {
58   if (!color || (!color->IsColorSpaceRGB() && !color->IsColorSpaceGray())) {
59     return false;
60   }
61 
62   std::optional<FX_RGB_STRUCT<float>> colors = color->GetRGB();
63   if (!colors.has_value()) {
64     return false;
65   }
66 
67   WriteFloat(buf, colors.value().red) << " ";
68   WriteFloat(buf, colors.value().green) << " ";
69   WriteFloat(buf, colors.value().blue);
70   return true;
71 }
72 
RecordPageObjectResourceUsage(const CPDF_PageObject * page_object,ResourcesMap & seen_resources)73 void RecordPageObjectResourceUsage(const CPDF_PageObject* page_object,
74                                    ResourcesMap& seen_resources) {
75   const ByteString& resource_name = page_object->GetResourceName();
76   if (!resource_name.IsEmpty()) {
77     switch (page_object->GetType()) {
78       case CPDF_PageObject::Type::kText:
79         seen_resources["Font"].insert(resource_name);
80         break;
81       case CPDF_PageObject::Type::kImage:
82       case CPDF_PageObject::Type::kForm:
83         seen_resources["XObject"].insert(resource_name);
84         break;
85       case CPDF_PageObject::Type::kPath:
86         break;
87       case CPDF_PageObject::Type::kShading:
88         break;
89     }
90   }
91   for (const auto& name : page_object->GetGraphicsResourceNames()) {
92     CHECK(!name.IsEmpty());
93     seen_resources["ExtGState"].insert(name);
94   }
95 }
96 
RemoveUnusedResources(RetainPtr<CPDF_Dictionary> resources_dict,const ResourcesMap & resources_in_use)97 void RemoveUnusedResources(RetainPtr<CPDF_Dictionary> resources_dict,
98                            const ResourcesMap& resources_in_use) {
99   // TODO(thestig): Remove other unused resource types:
100   // - ColorSpace
101   // - Pattern
102   // - Shading
103   static constexpr const char* kResourceKeys[] = {"ExtGState", "Font",
104                                                   "XObject"};
105   for (const char* resource_key : kResourceKeys) {
106     RetainPtr<CPDF_Dictionary> resource_dict =
107         resources_dict->GetMutableDictFor(resource_key);
108     if (!resource_dict) {
109       continue;
110     }
111 
112     std::vector<ByteString> keys;
113     {
114       CPDF_DictionaryLocker resource_dict_locker(resource_dict);
115       for (auto& it : resource_dict_locker) {
116         keys.push_back(it.first);
117       }
118     }
119 
120     auto it = resources_in_use.find(resource_key);
121     const std::set<ByteString>* resource_in_use_of_current_type =
122         it != resources_in_use.end() ? &it->second : nullptr;
123     for (const ByteString& key : keys) {
124       if (resource_in_use_of_current_type &&
125           pdfium::Contains(*resource_in_use_of_current_type, key)) {
126         continue;
127       }
128 
129       resource_dict->RemoveFor(key.AsStringView());
130     }
131   }
132 }
133 
134 }  // namespace
135 
CPDF_PageContentGenerator(CPDF_PageObjectHolder * pObjHolder)136 CPDF_PageContentGenerator::CPDF_PageContentGenerator(
137     CPDF_PageObjectHolder* pObjHolder)
138     : m_pObjHolder(pObjHolder), m_pDocument(pObjHolder->GetDocument()) {
139   // Copy all page objects, even if they are inactive.
140   for (const auto& pObj : *pObjHolder) {
141     m_pageObjects.emplace_back(pObj.get());
142   }
143 }
144 
145 CPDF_PageContentGenerator::~CPDF_PageContentGenerator() = default;
146 
GenerateContent()147 void CPDF_PageContentGenerator::GenerateContent() {
148   DCHECK(m_pObjHolder->IsPage());
149   std::map<int32_t, fxcrt::ostringstream> new_stream_data =
150       GenerateModifiedStreams();
151   // If no streams were regenerated or removed, nothing to do here.
152   if (new_stream_data.empty()) {
153     return;
154   }
155 
156   UpdateContentStreams(std::move(new_stream_data));
157   UpdateResourcesDict();
158 }
159 
160 std::map<int32_t, fxcrt::ostringstream>
GenerateModifiedStreams()161 CPDF_PageContentGenerator::GenerateModifiedStreams() {
162   // Figure out which streams are dirty.
163   std::set<int32_t> all_dirty_streams;
164   for (auto& pPageObj : m_pageObjects) {
165     // Must include dirty page objects even if they are marked as inactive.
166     // Otherwise an inactive object will not be detected that its stream needs
167     // to be removed as part of regeneration.
168     if (pPageObj->IsDirty())
169       all_dirty_streams.insert(pPageObj->GetContentStream());
170   }
171   std::set<int32_t> marked_dirty_streams = m_pObjHolder->TakeDirtyStreams();
172   all_dirty_streams.insert(marked_dirty_streams.begin(),
173                            marked_dirty_streams.end());
174 
175   // Start regenerating dirty streams.
176   std::map<int32_t, fxcrt::ostringstream> streams;
177   std::set<int32_t> empty_streams;
178   std::unique_ptr<const CPDF_ContentMarks> empty_content_marks =
179       std::make_unique<CPDF_ContentMarks>();
180   std::map<int32_t, const CPDF_ContentMarks*> current_content_marks;
181 
182   for (int32_t dirty_stream : all_dirty_streams) {
183     fxcrt::ostringstream buf;
184 
185     // Set the default graphic state values. Update CTM to be the identity
186     // matrix for the duration of this stream, if it is not already.
187     buf << "q\n";
188     const CFX_Matrix ctm =
189         m_pObjHolder->GetCTMAtBeginningOfStream(dirty_stream);
190     if (!ctm.IsIdentity()) {
191       WriteMatrix(buf, ctm.GetInverse()) << " cm\n";
192     }
193 
194     ProcessDefaultGraphics(&buf);
195     streams[dirty_stream] = std::move(buf);
196     empty_streams.insert(dirty_stream);
197     current_content_marks[dirty_stream] = empty_content_marks.get();
198   }
199 
200   // Process the page objects, write into each dirty stream.
201   for (auto& pPageObj : m_pageObjects) {
202     if (!pPageObj->IsActive()) {
203       continue;
204     }
205 
206     int stream_index = pPageObj->GetContentStream();
207     auto it = streams.find(stream_index);
208     if (it == streams.end())
209       continue;
210 
211     fxcrt::ostringstream* buf = &it->second;
212     empty_streams.erase(stream_index);
213     current_content_marks[stream_index] =
214         ProcessContentMarks(buf, pPageObj, current_content_marks[stream_index]);
215     ProcessPageObject(buf, pPageObj);
216   }
217 
218   // Finish dirty streams.
219   for (int32_t dirty_stream : all_dirty_streams) {
220     CFX_Matrix prev_ctm;
221     CFX_Matrix ctm;
222     bool affects_ctm;
223     if (dirty_stream == 0) {
224       // For the first stream, `prev_ctm` is the identity matrix.
225       ctm = m_pObjHolder->GetCTMAtEndOfStream(dirty_stream);
226       affects_ctm = !ctm.IsIdentity();
227     } else if (dirty_stream > 0) {
228       prev_ctm = m_pObjHolder->GetCTMAtEndOfStream(dirty_stream - 1);
229       ctm = m_pObjHolder->GetCTMAtEndOfStream(dirty_stream);
230       affects_ctm = prev_ctm != ctm;
231     } else {
232       CHECK_EQ(CPDF_PageObject::kNoContentStream, dirty_stream);
233       // This is the last stream, so there is no subsequent stream that it can
234       // affect.
235       affects_ctm = false;
236     }
237 
238     const bool is_empty = pdfium::Contains(empty_streams, dirty_stream);
239 
240     fxcrt::ostringstream* buf = &streams[dirty_stream];
241     if (is_empty && !affects_ctm) {
242       // Clear to show that this stream needs to be deleted.
243       buf->str("");
244       continue;
245     }
246 
247     if (!is_empty) {
248       FinishMarks(buf, current_content_marks[dirty_stream]);
249     }
250 
251     // Return graphics to original state.
252     *buf << "Q\n";
253 
254     if (affects_ctm) {
255       // Update CTM so the next stream gets the expected value.
256       CFX_Matrix ctm_difference = prev_ctm.GetInverse() * ctm;
257       if (!ctm_difference.IsIdentity()) {
258         WriteMatrix(*buf, ctm_difference) << " cm\n";
259       }
260     }
261   }
262 
263   return streams;
264 }
265 
UpdateContentStreams(std::map<int32_t,fxcrt::ostringstream> && new_stream_data)266 void CPDF_PageContentGenerator::UpdateContentStreams(
267     std::map<int32_t, fxcrt::ostringstream>&& new_stream_data) {
268   CHECK(!new_stream_data.empty());
269 
270   // Make sure default graphics are created.
271   m_DefaultGraphicsName = GetOrCreateDefaultGraphics();
272 
273   CPDF_PageContentManager page_content_manager(m_pObjHolder, m_pDocument);
274   for (auto& pair : new_stream_data) {
275     int32_t stream_index = pair.first;
276     fxcrt::ostringstream* buf = &pair.second;
277 
278     if (stream_index == CPDF_PageObject::kNoContentStream) {
279       int new_stream_index =
280           pdfium::checked_cast<int>(page_content_manager.AddStream(buf));
281       UpdateStreamlessPageObjects(new_stream_index);
282       continue;
283     }
284 
285     if (page_content_manager.HasStreamAtIndex(stream_index)) {
286       page_content_manager.UpdateStream(stream_index, buf);
287     } else {
288       page_content_manager.AddStream(buf);
289     }
290   }
291 }
292 
UpdateResourcesDict()293 void CPDF_PageContentGenerator::UpdateResourcesDict() {
294   RetainPtr<CPDF_Dictionary> resources = m_pObjHolder->GetMutableResources();
295   if (!resources) {
296     return;
297   }
298 
299   const uint32_t resources_object_number = resources->GetObjNum();
300   if (resources_object_number) {
301     // If `resources` is not an inline object, then do not modify it directly if
302     // it has multiple references.
303     if (pdfium::Contains(GetObjectsWithMultipleReferences(m_pDocument),
304                          resources_object_number)) {
305       resources = pdfium::WrapRetain(resources->Clone()->AsMutableDictionary());
306       const uint32_t clone_object_number =
307           m_pDocument->AddIndirectObject(resources);
308       m_pObjHolder->SetResources(resources);
309       m_pObjHolder->GetMutableDict()->SetNewFor<CPDF_Reference>(
310           pdfium::page_object::kResources, m_pDocument, clone_object_number);
311     }
312   }
313 
314   ResourcesMap seen_resources;
315   for (auto& page_object : m_pageObjects) {
316     if (!page_object->IsActive()) {
317       continue;
318     }
319     RecordPageObjectResourceUsage(page_object, seen_resources);
320   }
321   if (!m_DefaultGraphicsName.IsEmpty()) {
322     seen_resources["ExtGState"].insert(m_DefaultGraphicsName);
323   }
324 
325   RemoveUnusedResources(std::move(resources), seen_resources);
326 }
327 
RealizeResource(const CPDF_Object * pResource,const ByteString & bsType) const328 ByteString CPDF_PageContentGenerator::RealizeResource(
329     const CPDF_Object* pResource,
330     const ByteString& bsType) const {
331   DCHECK(pResource);
332   if (!m_pObjHolder->GetResources()) {
333     m_pObjHolder->SetResources(m_pDocument->NewIndirect<CPDF_Dictionary>());
334     m_pObjHolder->GetMutableDict()->SetNewFor<CPDF_Reference>(
335         pdfium::page_object::kResources, m_pDocument,
336         m_pObjHolder->GetResources()->GetObjNum());
337   }
338 
339   RetainPtr<CPDF_Dictionary> pResList =
340       m_pObjHolder->GetMutableResources()->GetOrCreateDictFor(bsType);
341   ByteString name;
342   int idnum = 1;
343   while (true) {
344     name = ByteString::Format("FX%c%d", bsType[0], idnum);
345     if (!pResList->KeyExist(name))
346       break;
347 
348     idnum++;
349   }
350   pResList->SetNewFor<CPDF_Reference>(name, m_pDocument,
351                                       pResource->GetObjNum());
352   return name;
353 }
354 
ProcessPageObjects(fxcrt::ostringstream * buf)355 bool CPDF_PageContentGenerator::ProcessPageObjects(fxcrt::ostringstream* buf) {
356   bool bDirty = false;
357   std::unique_ptr<const CPDF_ContentMarks> empty_content_marks =
358       std::make_unique<CPDF_ContentMarks>();
359   const CPDF_ContentMarks* content_marks = empty_content_marks.get();
360 
361   for (auto& pPageObj : m_pageObjects) {
362     if (m_pObjHolder->IsPage() &&
363         (!pPageObj->IsDirty() || !pPageObj->IsActive())) {
364       continue;
365     }
366 
367     bDirty = true;
368     content_marks = ProcessContentMarks(buf, pPageObj, content_marks);
369     ProcessPageObject(buf, pPageObj);
370   }
371   FinishMarks(buf, content_marks);
372   return bDirty;
373 }
374 
UpdateStreamlessPageObjects(int new_content_stream_index)375 void CPDF_PageContentGenerator::UpdateStreamlessPageObjects(
376     int new_content_stream_index) {
377   for (auto& pPageObj : m_pageObjects) {
378     if (!pPageObj->IsActive()) {
379       continue;
380     }
381 
382     if (pPageObj->GetContentStream() == CPDF_PageObject::kNoContentStream) {
383       pPageObj->SetContentStream(new_content_stream_index);
384     }
385   }
386 }
387 
ProcessContentMarks(fxcrt::ostringstream * buf,const CPDF_PageObject * pPageObj,const CPDF_ContentMarks * pPrev)388 const CPDF_ContentMarks* CPDF_PageContentGenerator::ProcessContentMarks(
389     fxcrt::ostringstream* buf,
390     const CPDF_PageObject* pPageObj,
391     const CPDF_ContentMarks* pPrev) {
392   const CPDF_ContentMarks* pNext = pPageObj->GetContentMarks();
393   const size_t first_different = pPrev->FindFirstDifference(pNext);
394 
395   // Close all marks that are in prev but not in next.
396   // Technically we should iterate backwards to close from the top to the
397   // bottom, but since the EMC operators do not identify which mark they are
398   // closing, it does not matter.
399   for (size_t i = first_different; i < pPrev->CountItems(); ++i)
400     *buf << "EMC\n";
401 
402   // Open all marks that are in next but not in prev.
403   for (size_t i = first_different; i < pNext->CountItems(); ++i) {
404     const CPDF_ContentMarkItem* item = pNext->GetItem(i);
405 
406     // Write mark tag.
407     *buf << "/" << PDF_NameEncode(item->GetName()) << " ";
408 
409     // If there are no parameters, write a BMC (begin marked content) operator.
410     if (item->GetParamType() == CPDF_ContentMarkItem::kNone) {
411       *buf << "BMC\n";
412       continue;
413     }
414 
415     // If there are parameters, write properties, direct or indirect.
416     switch (item->GetParamType()) {
417       case CPDF_ContentMarkItem::kDirectDict: {
418         CPDF_StringArchiveStream archive_stream(buf);
419         item->GetParam()->WriteTo(&archive_stream, nullptr);
420         *buf << " ";
421         break;
422       }
423       case CPDF_ContentMarkItem::kPropertiesDict: {
424         *buf << "/" << item->GetPropertyName() << " ";
425         break;
426       }
427       case CPDF_ContentMarkItem::kNone:
428         NOTREACHED_NORETURN();
429     }
430 
431     // Write BDC (begin dictionary content) operator.
432     *buf << "BDC\n";
433   }
434 
435   return pNext;
436 }
437 
FinishMarks(fxcrt::ostringstream * buf,const CPDF_ContentMarks * pContentMarks)438 void CPDF_PageContentGenerator::FinishMarks(
439     fxcrt::ostringstream* buf,
440     const CPDF_ContentMarks* pContentMarks) {
441   // Technically we should iterate backwards to close from the top to the
442   // bottom, but since the EMC operators do not identify which mark they are
443   // closing, it does not matter.
444   for (size_t i = 0; i < pContentMarks->CountItems(); ++i)
445     *buf << "EMC\n";
446 }
447 
ProcessPageObject(fxcrt::ostringstream * buf,CPDF_PageObject * pPageObj)448 void CPDF_PageContentGenerator::ProcessPageObject(fxcrt::ostringstream* buf,
449                                                   CPDF_PageObject* pPageObj) {
450   if (CPDF_ImageObject* pImageObject = pPageObj->AsImage())
451     ProcessImage(buf, pImageObject);
452   else if (CPDF_FormObject* pFormObj = pPageObj->AsForm())
453     ProcessForm(buf, pFormObj);
454   else if (CPDF_PathObject* pPathObj = pPageObj->AsPath())
455     ProcessPath(buf, pPathObj);
456   else if (CPDF_TextObject* pTextObj = pPageObj->AsText())
457     ProcessText(buf, pTextObj);
458   pPageObj->SetDirty(false);
459 }
460 
ProcessImage(fxcrt::ostringstream * buf,CPDF_ImageObject * pImageObj)461 void CPDF_PageContentGenerator::ProcessImage(fxcrt::ostringstream* buf,
462                                              CPDF_ImageObject* pImageObj) {
463   const CFX_Matrix& matrix = pImageObj->matrix();
464   if ((matrix.a == 0 && matrix.b == 0) || (matrix.c == 0 && matrix.d == 0)) {
465     return;
466   }
467 
468   RetainPtr<CPDF_Image> pImage = pImageObj->GetImage();
469   if (pImage->IsInline())
470     return;
471 
472   RetainPtr<const CPDF_Stream> pStream = pImage->GetStream();
473   if (!pStream)
474     return;
475 
476   *buf << "q ";
477 
478   if (!matrix.IsIdentity()) {
479     WriteMatrix(*buf, matrix) << " cm ";
480   }
481 
482   bool bWasInline = pStream->IsInline();
483   if (bWasInline)
484     pImage->ConvertStreamToIndirectObject();
485 
486   ByteString name = RealizeResource(pStream, "XObject");
487   pImageObj->SetResourceName(name);
488 
489   if (bWasInline) {
490     auto* pPageData = CPDF_DocPageData::FromDocument(m_pDocument);
491     pImageObj->SetImage(pPageData->GetImage(pStream->GetObjNum()));
492   }
493 
494   *buf << "/" << PDF_NameEncode(name) << " Do Q\n";
495 }
496 
ProcessForm(fxcrt::ostringstream * buf,CPDF_FormObject * pFormObj)497 void CPDF_PageContentGenerator::ProcessForm(fxcrt::ostringstream* buf,
498                                             CPDF_FormObject* pFormObj) {
499   const CFX_Matrix& matrix = pFormObj->form_matrix();
500   if ((matrix.a == 0 && matrix.b == 0) || (matrix.c == 0 && matrix.d == 0)) {
501     return;
502   }
503 
504   RetainPtr<const CPDF_Stream> pStream = pFormObj->form()->GetStream();
505   if (!pStream)
506     return;
507 
508   ByteString name = RealizeResource(pStream.Get(), "XObject");
509   pFormObj->SetResourceName(name);
510 
511   *buf << "q\n";
512 
513   if (!matrix.IsIdentity()) {
514     WriteMatrix(*buf, matrix) << " cm ";
515   }
516 
517   *buf << "/" << PDF_NameEncode(name) << " Do Q\n";
518 }
519 
520 // Processing path construction with operators from Table 4.9 of PDF spec 1.7:
521 // "re" appends a rectangle (here, used only if the whole path is a rectangle)
522 // "m" moves current point to the given coordinates
523 // "l" creates a line from current point to the new point
524 // "c" adds a Bezier curve from current to last point, using the two other
525 // points as the Bezier control points
526 // Note: "l", "c" change the current point
527 // "h" closes the subpath (appends a line from current to starting point)
ProcessPathPoints(fxcrt::ostringstream * buf,CPDF_Path * pPath)528 void CPDF_PageContentGenerator::ProcessPathPoints(fxcrt::ostringstream* buf,
529                                                   CPDF_Path* pPath) {
530   pdfium::span<const CFX_Path::Point> points = pPath->GetPoints();
531   if (pPath->IsRect()) {
532     CFX_PointF diff = points[2].m_Point - points[0].m_Point;
533     WritePoint(*buf, points[0].m_Point) << " ";
534     WritePoint(*buf, diff) << " re";
535     return;
536   }
537   for (size_t i = 0; i < points.size(); ++i) {
538     if (i > 0)
539       *buf << " ";
540 
541     WritePoint(*buf, points[i].m_Point);
542 
543     CFX_Path::Point::Type point_type = points[i].m_Type;
544     if (point_type == CFX_Path::Point::Type::kMove) {
545       *buf << " m";
546     } else if (point_type == CFX_Path::Point::Type::kLine) {
547       *buf << " l";
548     } else if (point_type == CFX_Path::Point::Type::kBezier) {
549       if (i + 2 >= points.size() ||
550           !points[i].IsTypeAndOpen(CFX_Path::Point::Type::kBezier) ||
551           !points[i + 1].IsTypeAndOpen(CFX_Path::Point::Type::kBezier) ||
552           points[i + 2].m_Type != CFX_Path::Point::Type::kBezier) {
553         // If format is not supported, close the path and paint
554         *buf << " h";
555         break;
556       }
557       *buf << " ";
558       WritePoint(*buf, points[i + 1].m_Point) << " ";
559       WritePoint(*buf, points[i + 2].m_Point) << " c";
560       i += 2;
561     }
562     if (points[i].m_CloseFigure)
563       *buf << " h";
564   }
565 }
566 
567 // Processing path painting with operators from Table 4.10 of PDF spec 1.7:
568 // Path painting operators: "S", "n", "B", "f", "B*", "f*", depending on
569 // the filling mode and whether we want stroking the path or not.
570 // "Q" restores the graphics state imposed by the ProcessGraphics method.
ProcessPath(fxcrt::ostringstream * buf,CPDF_PathObject * pPathObj)571 void CPDF_PageContentGenerator::ProcessPath(fxcrt::ostringstream* buf,
572                                             CPDF_PathObject* pPathObj) {
573   ProcessGraphics(buf, pPathObj);
574 
575   const CFX_Matrix& matrix = pPathObj->matrix();
576   if (!matrix.IsIdentity()) {
577     WriteMatrix(*buf, matrix) << " cm ";
578   }
579 
580   ProcessPathPoints(buf, &pPathObj->path());
581 
582   if (pPathObj->has_no_filltype())
583     *buf << (pPathObj->stroke() ? " S" : " n");
584   else if (pPathObj->has_winding_filltype())
585     *buf << (pPathObj->stroke() ? " B" : " f");
586   else if (pPathObj->has_alternate_filltype())
587     *buf << (pPathObj->stroke() ? " B*" : " f*");
588   *buf << " Q\n";
589 }
590 
591 // This method supports color operators rg and RGB from Table 4.24 of PDF spec
592 // 1.7. A color will not be set if the colorspace is not DefaultRGB or the RGB
593 // values cannot be obtained. The method also adds an external graphics
594 // dictionary, as described in Section 4.3.4.
595 // "rg" sets the fill color, "RG" sets the stroke color (using DefaultRGB)
596 // "w" sets the stroke line width.
597 // "ca" sets the fill alpha, "CA" sets the stroke alpha.
598 // "W" and "W*" modify the clipping path using the nonzero winding rule and
599 // even-odd rules, respectively.
600 // "q" saves the graphics state, so that the settings can later be reversed
ProcessGraphics(fxcrt::ostringstream * buf,CPDF_PageObject * pPageObj)601 void CPDF_PageContentGenerator::ProcessGraphics(fxcrt::ostringstream* buf,
602                                                 CPDF_PageObject* pPageObj) {
603   *buf << "q ";
604   if (WriteColorToStream(*buf, pPageObj->color_state().GetFillColor())) {
605     *buf << " rg ";
606   }
607   if (WriteColorToStream(*buf, pPageObj->color_state().GetStrokeColor())) {
608     *buf << " RG ";
609   }
610   float line_width = pPageObj->graph_state().GetLineWidth();
611   if (line_width != 1.0f) {
612     WriteFloat(*buf, line_width) << " w ";
613   }
614   CFX_GraphStateData::LineCap lineCap = pPageObj->graph_state().GetLineCap();
615   if (lineCap != CFX_GraphStateData::LineCap::kButt)
616     *buf << static_cast<int>(lineCap) << " J ";
617   CFX_GraphStateData::LineJoin lineJoin = pPageObj->graph_state().GetLineJoin();
618   if (lineJoin != CFX_GraphStateData::LineJoin::kMiter)
619     *buf << static_cast<int>(lineJoin) << " j ";
620   std::vector<float> dash_array = pPageObj->graph_state().GetLineDashArray();
621   if (dash_array.size()) {
622     *buf << "[";
623     for (size_t i = 0; i < dash_array.size(); ++i) {
624       if (i > 0) {
625         *buf << " ";
626       }
627       WriteFloat(*buf, dash_array[i]);
628     }
629     *buf << "] ";
630     WriteFloat(*buf, pPageObj->graph_state().GetLineDashPhase()) << " d ";
631   }
632 
633   const CPDF_ClipPath& clip_path = pPageObj->clip_path();
634   if (clip_path.HasRef()) {
635     for (size_t i = 0; i < clip_path.GetPathCount(); ++i) {
636       CPDF_Path path = clip_path.GetPath(i);
637       ProcessPathPoints(buf, &path);
638       switch (clip_path.GetClipType(i)) {
639         case CFX_FillRenderOptions::FillType::kWinding:
640           *buf << " W ";
641           break;
642         case CFX_FillRenderOptions::FillType::kEvenOdd:
643           *buf << " W* ";
644           break;
645         case CFX_FillRenderOptions::FillType::kNoFill:
646           NOTREACHED_NORETURN();
647       }
648 
649       // Use a no-op path-painting operator to terminate the path without
650       // causing any marks to be placed on the page.
651       *buf << "n ";
652     }
653   }
654 
655   GraphicsData graphD;
656   graphD.fillAlpha = pPageObj->general_state().GetFillAlpha();
657   graphD.strokeAlpha = pPageObj->general_state().GetStrokeAlpha();
658   graphD.blendType = pPageObj->general_state().GetBlendType();
659   if (graphD.fillAlpha == 1.0f && graphD.strokeAlpha == 1.0f &&
660       graphD.blendType == BlendMode::kNormal) {
661     return;
662   }
663 
664   ByteString name;
665   std::optional<ByteString> maybe_name =
666       m_pObjHolder->GraphicsMapSearch(graphD);
667   if (maybe_name.has_value()) {
668     name = std::move(maybe_name.value());
669   } else {
670     auto gsDict = pdfium::MakeRetain<CPDF_Dictionary>();
671     if (graphD.fillAlpha != 1.0f)
672       gsDict->SetNewFor<CPDF_Number>("ca", graphD.fillAlpha);
673 
674     if (graphD.strokeAlpha != 1.0f)
675       gsDict->SetNewFor<CPDF_Number>("CA", graphD.strokeAlpha);
676 
677     if (graphD.blendType != BlendMode::kNormal) {
678       gsDict->SetNewFor<CPDF_Name>("BM",
679                                    pPageObj->general_state().GetBlendMode());
680     }
681     m_pDocument->AddIndirectObject(gsDict);
682     name = RealizeResource(std::move(gsDict), "ExtGState");
683     pPageObj->mutable_general_state().SetGraphicsResourceNames({name});
684     m_pObjHolder->GraphicsMapInsert(graphD, name);
685   }
686   *buf << "/" << PDF_NameEncode(name) << " gs ";
687 }
688 
ProcessDefaultGraphics(fxcrt::ostringstream * buf)689 void CPDF_PageContentGenerator::ProcessDefaultGraphics(
690     fxcrt::ostringstream* buf) {
691   *buf << "0 0 0 RG 0 0 0 rg 1 w "
692        << static_cast<int>(CFX_GraphStateData::LineCap::kButt) << " J "
693        << static_cast<int>(CFX_GraphStateData::LineJoin::kMiter) << " j\n";
694   m_DefaultGraphicsName = GetOrCreateDefaultGraphics();
695   *buf << "/" << PDF_NameEncode(m_DefaultGraphicsName) << " gs ";
696 }
697 
GetOrCreateDefaultGraphics() const698 ByteString CPDF_PageContentGenerator::GetOrCreateDefaultGraphics() const {
699   GraphicsData defaultGraphics;
700   defaultGraphics.fillAlpha = 1.0f;
701   defaultGraphics.strokeAlpha = 1.0f;
702   defaultGraphics.blendType = BlendMode::kNormal;
703 
704   std::optional<ByteString> maybe_name =
705       m_pObjHolder->GraphicsMapSearch(defaultGraphics);
706   if (maybe_name.has_value())
707     return maybe_name.value();
708 
709   auto gsDict = pdfium::MakeRetain<CPDF_Dictionary>();
710   gsDict->SetNewFor<CPDF_Number>("ca", defaultGraphics.fillAlpha);
711   gsDict->SetNewFor<CPDF_Number>("CA", defaultGraphics.strokeAlpha);
712   gsDict->SetNewFor<CPDF_Name>("BM", "Normal");
713   m_pDocument->AddIndirectObject(gsDict);
714   ByteString name = RealizeResource(std::move(gsDict), "ExtGState");
715   m_pObjHolder->GraphicsMapInsert(defaultGraphics, name);
716   return name;
717 }
718 
719 // This method adds text to the buffer, BT begins the text object, ET ends it.
720 // Tm sets the text matrix (allows positioning and transforming text).
721 // Tf sets the font name (from Font in Resources) and font size.
722 // Tr sets the text rendering mode.
723 // Tj sets the actual text, <####...> is used when specifying charcodes.
ProcessText(fxcrt::ostringstream * buf,CPDF_TextObject * pTextObj)724 void CPDF_PageContentGenerator::ProcessText(fxcrt::ostringstream* buf,
725                                             CPDF_TextObject* pTextObj) {
726   ProcessGraphics(buf, pTextObj);
727   *buf << "BT ";
728 
729   const CFX_Matrix& matrix = pTextObj->GetTextMatrix();
730   if (!matrix.IsIdentity()) {
731     WriteMatrix(*buf, matrix) << " Tm ";
732   }
733 
734   RetainPtr<CPDF_Font> pFont(pTextObj->GetFont());
735   if (!pFont)
736     pFont = CPDF_Font::GetStockFont(m_pDocument, "Helvetica");
737 
738   FontData data;
739   const CPDF_FontEncoding* pEncoding = nullptr;
740   if (pFont->IsType1Font()) {
741     data.type = "Type1";
742     pEncoding = pFont->AsType1Font()->GetEncoding();
743   } else if (pFont->IsTrueTypeFont()) {
744     data.type = "TrueType";
745     pEncoding = pFont->AsTrueTypeFont()->GetEncoding();
746   } else if (pFont->IsCIDFont()) {
747     data.type = "Type0";
748   } else {
749     return;
750   }
751   data.baseFont = pFont->GetBaseFontName();
752 
753   ByteString dict_name;
754   std::optional<ByteString> maybe_name = m_pObjHolder->FontsMapSearch(data);
755   if (maybe_name.has_value()) {
756     dict_name = std::move(maybe_name.value());
757   } else {
758     RetainPtr<const CPDF_Object> pIndirectFont = pFont->GetFontDict();
759     if (pIndirectFont->IsInline()) {
760       // In this case we assume it must be a standard font
761       auto pFontDict = pdfium::MakeRetain<CPDF_Dictionary>();
762       pFontDict->SetNewFor<CPDF_Name>("Type", "Font");
763       pFontDict->SetNewFor<CPDF_Name>("Subtype", data.type);
764       pFontDict->SetNewFor<CPDF_Name>("BaseFont", data.baseFont);
765       if (pEncoding) {
766         pFontDict->SetFor("Encoding",
767                           pEncoding->Realize(m_pDocument->GetByteStringPool()));
768       }
769       m_pDocument->AddIndirectObject(pFontDict);
770       pIndirectFont = std::move(pFontDict);
771     }
772     dict_name = RealizeResource(std::move(pIndirectFont), "Font");
773     m_pObjHolder->FontsMapInsert(data, dict_name);
774   }
775   pTextObj->SetResourceName(dict_name);
776 
777   *buf << "/" << PDF_NameEncode(dict_name) << " ";
778   WriteFloat(*buf, pTextObj->GetFontSize()) << " Tf ";
779   *buf << static_cast<int>(pTextObj->GetTextRenderMode()) << " Tr ";
780   ByteString text;
781   for (uint32_t charcode : pTextObj->GetCharCodes()) {
782     if (charcode != CPDF_Font::kInvalidCharCode) {
783       pFont->AppendChar(&text, charcode);
784     }
785   }
786   *buf << PDF_HexEncodeString(text.AsStringView()) << " Tj ET";
787   *buf << " Q\n";
788 }
789