• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "testing/helpers/write.h"
6 
7 #include <limits.h>
8 
9 #include <sstream>
10 #include <string>
11 #include <utility>
12 #include <vector>
13 
14 #include "core/fxcrt/check.h"
15 #include "core/fxcrt/notreached.h"
16 #include "public/cpp/fpdf_scopers.h"
17 #include "public/fpdf_annot.h"
18 #include "public/fpdf_attachment.h"
19 #include "public/fpdf_edit.h"
20 #include "public/fpdf_thumbnail.h"
21 #include "testing/fx_string_testhelpers.h"
22 #include "testing/image_diff/image_diff_png.h"
23 
24 #ifdef PDF_ENABLE_SKIA
25 #include "third_party/skia/include/core/SkPicture.h"       // nogncheck
26 #include "third_party/skia/include/core/SkSerialProcs.h"   // nogncheck
27 #include "third_party/skia/include/core/SkStream.h"        // nogncheck
28 #include "third_party/skia/include/encode/SkPngEncoder.h"  // nogncheck
29 #endif
30 
31 namespace {
32 
CheckDimensions(int stride,int width,int height)33 bool CheckDimensions(int stride, int width, int height) {
34   if (stride < 0 || width < 0 || height < 0) {
35     return false;
36   }
37   if (height > 0 && stride > INT_MAX / height) {
38     return false;
39   }
40   return true;
41 }
42 
AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype)43 const char* AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype) {
44   if (subtype == FPDF_ANNOT_TEXT) {
45     return "Text";
46   }
47   if (subtype == FPDF_ANNOT_LINK) {
48     return "Link";
49   }
50   if (subtype == FPDF_ANNOT_FREETEXT) {
51     return "FreeText";
52   }
53   if (subtype == FPDF_ANNOT_LINE) {
54     return "Line";
55   }
56   if (subtype == FPDF_ANNOT_SQUARE) {
57     return "Square";
58   }
59   if (subtype == FPDF_ANNOT_CIRCLE) {
60     return "Circle";
61   }
62   if (subtype == FPDF_ANNOT_POLYGON) {
63     return "Polygon";
64   }
65   if (subtype == FPDF_ANNOT_POLYLINE) {
66     return "PolyLine";
67   }
68   if (subtype == FPDF_ANNOT_HIGHLIGHT) {
69     return "Highlight";
70   }
71   if (subtype == FPDF_ANNOT_UNDERLINE) {
72     return "Underline";
73   }
74   if (subtype == FPDF_ANNOT_SQUIGGLY) {
75     return "Squiggly";
76   }
77   if (subtype == FPDF_ANNOT_STRIKEOUT) {
78     return "StrikeOut";
79   }
80   if (subtype == FPDF_ANNOT_STAMP) {
81     return "Stamp";
82   }
83   if (subtype == FPDF_ANNOT_CARET) {
84     return "Caret";
85   }
86   if (subtype == FPDF_ANNOT_INK) {
87     return "Ink";
88   }
89   if (subtype == FPDF_ANNOT_POPUP) {
90     return "Popup";
91   }
92   if (subtype == FPDF_ANNOT_FILEATTACHMENT) {
93     return "FileAttachment";
94   }
95   if (subtype == FPDF_ANNOT_SOUND) {
96     return "Sound";
97   }
98   if (subtype == FPDF_ANNOT_MOVIE) {
99     return "Movie";
100   }
101   if (subtype == FPDF_ANNOT_WIDGET) {
102     return "Widget";
103   }
104   if (subtype == FPDF_ANNOT_SCREEN) {
105     return "Screen";
106   }
107   if (subtype == FPDF_ANNOT_PRINTERMARK) {
108     return "PrinterMark";
109   }
110   if (subtype == FPDF_ANNOT_TRAPNET) {
111     return "TrapNet";
112   }
113   if (subtype == FPDF_ANNOT_WATERMARK) {
114     return "Watermark";
115   }
116   if (subtype == FPDF_ANNOT_THREED) {
117     return "3D";
118   }
119   if (subtype == FPDF_ANNOT_RICHMEDIA) {
120     return "RichMedia";
121   }
122   if (subtype == FPDF_ANNOT_XFAWIDGET) {
123     return "XFAWidget";
124   }
125   NOTREACHED_NORETURN();
126 }
127 
AppendFlagString(const char * flag,std::string * output)128 void AppendFlagString(const char* flag, std::string* output) {
129   if (!output->empty()) {
130     *output += ", ";
131   }
132   *output += flag;
133 }
134 
AnnotFlagsToString(int flags)135 std::string AnnotFlagsToString(int flags) {
136   std::string str;
137   if (flags & FPDF_ANNOT_FLAG_INVISIBLE) {
138     AppendFlagString("Invisible", &str);
139   }
140   if (flags & FPDF_ANNOT_FLAG_HIDDEN) {
141     AppendFlagString("Hidden", &str);
142   }
143   if (flags & FPDF_ANNOT_FLAG_PRINT) {
144     AppendFlagString("Print", &str);
145   }
146   if (flags & FPDF_ANNOT_FLAG_NOZOOM) {
147     AppendFlagString("NoZoom", &str);
148   }
149   if (flags & FPDF_ANNOT_FLAG_NOROTATE) {
150     AppendFlagString("NoRotate", &str);
151   }
152   if (flags & FPDF_ANNOT_FLAG_NOVIEW) {
153     AppendFlagString("NoView", &str);
154   }
155   if (flags & FPDF_ANNOT_FLAG_READONLY) {
156     AppendFlagString("ReadOnly", &str);
157   }
158   if (flags & FPDF_ANNOT_FLAG_LOCKED) {
159     AppendFlagString("Locked", &str);
160   }
161   if (flags & FPDF_ANNOT_FLAG_TOGGLENOVIEW) {
162     AppendFlagString("ToggleNoView", &str);
163   }
164   return str;
165 }
166 
PageObjectTypeToCString(int type)167 const char* PageObjectTypeToCString(int type) {
168   if (type == FPDF_PAGEOBJ_TEXT) {
169     return "Text";
170   }
171   if (type == FPDF_PAGEOBJ_PATH) {
172     return "Path";
173   }
174   if (type == FPDF_PAGEOBJ_IMAGE) {
175     return "Image";
176   }
177   if (type == FPDF_PAGEOBJ_SHADING) {
178     return "Shading";
179   }
180   if (type == FPDF_PAGEOBJ_FORM) {
181     return "Form";
182   }
183   NOTREACHED_NORETURN();
184 }
185 
EncodePng(pdfium::span<const uint8_t> input,int width,int height,int stride,int format)186 std::vector<uint8_t> EncodePng(pdfium::span<const uint8_t> input,
187                                int width,
188                                int height,
189                                int stride,
190                                int format) {
191   std::vector<uint8_t> png;
192   switch (format) {
193     case FPDFBitmap_Unknown:
194       break;
195     case FPDFBitmap_Gray:
196       png = image_diff_png::EncodeGrayPNG(input, width, height, stride);
197       break;
198     case FPDFBitmap_BGR:
199       png = image_diff_png::EncodeBGRPNG(input, width, height, stride);
200       break;
201     case FPDFBitmap_BGRx:
202       png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
203                                           /*discard_transparency=*/true);
204       break;
205     case FPDFBitmap_BGRA:
206       png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
207                                           /*discard_transparency=*/false);
208       break;
209     default:
210       NOTREACHED_NORETURN();
211   }
212   return png;
213 }
214 
215 #ifdef _WIN32
EnhMetaFileProc(HDC hdc,HANDLETABLE * handle_table,const ENHMETARECORD * record,int objects_count,LPARAM param)216 int CALLBACK EnhMetaFileProc(HDC hdc,
217                              HANDLETABLE* handle_table,
218                              const ENHMETARECORD* record,
219                              int objects_count,
220                              LPARAM param) {
221   std::vector<const ENHMETARECORD*>& items =
222       *reinterpret_cast<std::vector<const ENHMETARECORD*>*>(param);
223   items.push_back(record);
224   return 1;
225 }
226 #endif  // _WIN32
227 
GeneratePageOutputFilename(const char * pdf_name,int page_num,const char * extension)228 std::string GeneratePageOutputFilename(const char* pdf_name,
229                                        int page_num,
230                                        const char* extension) {
231   std::ostringstream stream;
232   stream << pdf_name << "." << page_num << "." << extension;
233   std::string filename = stream.str();
234   if (filename.size() >= 256) {
235     fprintf(stderr, "Filename %s is too long\n", filename.c_str());
236     return std::string();
237   }
238 
239   return filename;
240 }
241 
GenerateImageOutputFilename(const char * pdf_name,int page_num,int image_num,const char * extension)242 std::string GenerateImageOutputFilename(const char* pdf_name,
243                                         int page_num,
244                                         int image_num,
245                                         const char* extension) {
246   std::ostringstream stream;
247   stream << pdf_name << "." << page_num << "." << image_num << "." << extension;
248   std::string filename = stream.str();
249   if (filename.size() >= 256) {
250     fprintf(stderr, "Filename %s for saving image is too long.\n",
251             filename.c_str());
252     return std::string();
253   }
254 
255   return filename;
256 }
257 
258 }  // namespace
259 
WritePpm(const char * pdf_name,int num,void * buffer_void,int stride,int width,int height)260 std::string WritePpm(const char* pdf_name,
261                      int num,
262                      void* buffer_void,
263                      int stride,
264                      int width,
265                      int height) {
266   if (!CheckDimensions(stride, width, height)) {
267     return "";
268   }
269 
270   int out_len = width * height;
271   if (out_len > INT_MAX / 3) {
272     return "";
273   }
274 
275   out_len *= 3;
276 
277   std::string filename = GeneratePageOutputFilename(pdf_name, num, "ppm");
278   if (filename.empty()) {
279     return std::string();
280   }
281   FILE* fp = fopen(filename.c_str(), "wb");
282   if (!fp) {
283     return std::string();
284   }
285 
286   fprintf(fp, "P6\n# PDF test render\n%d %d\n255\n", width, height);
287   // Source data is B, G, R, unused.
288   // Dest data is R, G, B.
289   const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buffer_void);
290   std::vector<uint8_t> result(out_len);
291   for (int h = 0; h < height; ++h) {
292     const uint8_t* src_line = buffer + (stride * h);
293     uint8_t* dest_line = result.data() + (width * h * 3);
294     for (int w = 0; w < width; ++w) {
295       // R
296       dest_line[w * 3] = src_line[(w * 4) + 2];
297       // G
298       dest_line[(w * 3) + 1] = src_line[(w * 4) + 1];
299       // B
300       dest_line[(w * 3) + 2] = src_line[w * 4];
301     }
302   }
303   if (fwrite(result.data(), out_len, 1, fp) != 1) {
304     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
305   }
306 
307   fclose(fp);
308   return filename;
309 }
310 
WriteText(FPDF_TEXTPAGE textpage,const char * pdf_name,int num)311 void WriteText(FPDF_TEXTPAGE textpage, const char* pdf_name, int num) {
312   std::string filename = GeneratePageOutputFilename(pdf_name, num, "txt");
313   if (filename.empty()) {
314     return;
315   }
316   FILE* fp = fopen(filename.c_str(), "w");
317   if (!fp) {
318     fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
319     return;
320   }
321 
322   // Output in UTF32-LE.
323   uint32_t bom = 0x0000FEFF;
324   if (fwrite(&bom, sizeof(bom), 1, fp) != 1) {
325     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
326     (void)fclose(fp);
327     return;
328   }
329 
330   for (int i = 0; i < FPDFText_CountChars(textpage); i++) {
331     uint32_t c = FPDFText_GetUnicode(textpage, i);
332     if (fwrite(&c, sizeof(c), 1, fp) != 1) {
333       fprintf(stderr, "Failed to write to %s\n", filename.c_str());
334       break;
335     }
336   }
337   (void)fclose(fp);
338 }
339 
WriteAnnot(FPDF_PAGE page,const char * pdf_name,int num)340 void WriteAnnot(FPDF_PAGE page, const char* pdf_name, int num) {
341   // Open the output text file.
342   std::string filename = GeneratePageOutputFilename(pdf_name, num, "annot.txt");
343   if (filename.empty()) {
344     return;
345   }
346   FILE* fp = fopen(filename.c_str(), "w");
347   if (!fp) {
348     fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
349     return;
350   }
351 
352   int annot_count = FPDFPage_GetAnnotCount(page);
353   fprintf(fp, "Number of annotations: %d\n\n", annot_count);
354 
355   // Iterate through all annotations on this page.
356   for (int i = 0; i < annot_count; ++i) {
357     // Retrieve the annotation object and its subtype.
358     fprintf(fp, "Annotation #%d:\n", i + 1);
359     ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
360     if (!annot) {
361       fprintf(fp, "Failed to retrieve annotation!\n\n");
362       continue;
363     }
364 
365     FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
366     fprintf(fp, "Subtype: %s\n", AnnotSubtypeToCString(subtype));
367 
368     // Retrieve the annotation flags.
369     fprintf(fp, "Flags set: %s\n",
370             AnnotFlagsToString(FPDFAnnot_GetFlags(annot.get())).c_str());
371 
372     // Retrieve the annotation's object count and object types.
373     const int obj_count = FPDFAnnot_GetObjectCount(annot.get());
374     fprintf(fp, "Number of objects: %d\n", obj_count);
375     if (obj_count > 0) {
376       fprintf(fp, "Object types: ");
377       for (int j = 0; j < obj_count; ++j) {
378         const char* type = PageObjectTypeToCString(
379             FPDFPageObj_GetType(FPDFAnnot_GetObject(annot.get(), j)));
380         fprintf(fp, "%s  ", type);
381       }
382       fprintf(fp, "\n");
383     }
384 
385     // Retrieve the annotation's color and interior color.
386     unsigned int R;
387     unsigned int G;
388     unsigned int B;
389     unsigned int A;
390     if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_Color, &R, &G, &B,
391                            &A)) {
392       fprintf(fp, "Color in RGBA: %d %d %d %d\n", R, G, B, A);
393     } else {
394       fprintf(fp, "Failed to retrieve color.\n");
395     }
396     if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_InteriorColor, &R,
397                            &G, &B, &A)) {
398       fprintf(fp, "Interior color in RGBA: %d %d %d %d\n", R, G, B, A);
399     } else {
400       fprintf(fp, "Failed to retrieve interior color.\n");
401     }
402 
403     // Retrieve the annotation's contents and author.
404     static constexpr char kContentsKey[] = "Contents";
405     static constexpr char kAuthorKey[] = "T";
406     unsigned long length_bytes =
407         FPDFAnnot_GetStringValue(annot.get(), kContentsKey, nullptr, 0);
408     std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
409     FPDFAnnot_GetStringValue(annot.get(), kContentsKey, buf.data(),
410                              length_bytes);
411     fprintf(fp, "Content: %ls\n", GetPlatformWString(buf.data()).c_str());
412     length_bytes =
413         FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, nullptr, 0);
414     buf = GetFPDFWideStringBuffer(length_bytes);
415     FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, buf.data(), length_bytes);
416     fprintf(fp, "Author: %ls\n", GetPlatformWString(buf.data()).c_str());
417 
418     // Retrieve the annotation's quadpoints if it is a markup annotation.
419     if (FPDFAnnot_HasAttachmentPoints(annot.get())) {
420       size_t qp_count = FPDFAnnot_CountAttachmentPoints(annot.get());
421       fprintf(fp, "Number of quadpoints sets: %zu\n", qp_count);
422 
423       // Iterate through all quadpoints of the current annotation
424       for (size_t j = 0; j < qp_count; ++j) {
425         FS_QUADPOINTSF quadpoints;
426         if (FPDFAnnot_GetAttachmentPoints(annot.get(), j, &quadpoints)) {
427           fprintf(fp,
428                   "Quadpoints set #%zu: (%.3f, %.3f), (%.3f, %.3f), "
429                   "(%.3f, %.3f), (%.3f, %.3f)\n",
430                   j + 1, quadpoints.x1, quadpoints.y1, quadpoints.x2,
431                   quadpoints.y2, quadpoints.x3, quadpoints.y3, quadpoints.x4,
432                   quadpoints.y4);
433         } else {
434           fprintf(fp, "Failed to retrieve quadpoints set #%zu.\n", j + 1);
435         }
436       }
437     }
438 
439     // Retrieve the annotation's rectangle coordinates.
440     FS_RECTF rect;
441     if (FPDFAnnot_GetRect(annot.get(), &rect)) {
442       fprintf(fp, "Rectangle: l - %.3f, b - %.3f, r - %.3f, t - %.3f\n\n",
443               rect.left, rect.bottom, rect.right, rect.top);
444     } else {
445       fprintf(fp, "Failed to retrieve annotation rectangle.\n");
446     }
447   }
448 
449   (void)fclose(fp);
450 }
451 
WritePng(const char * pdf_name,int num,void * buffer,int stride,int width,int height)452 std::string WritePng(const char* pdf_name,
453                      int num,
454                      void* buffer,
455                      int stride,
456                      int width,
457                      int height) {
458   if (!CheckDimensions(stride, width, height)) {
459     return "";
460   }
461 
462   auto input = pdfium::make_span(static_cast<uint8_t*>(buffer),
463                                  static_cast<size_t>(stride) * height);
464   std::vector<uint8_t> png_encoding =
465       EncodePng(input, width, height, stride, FPDFBitmap_BGRA);
466   if (png_encoding.empty()) {
467     fprintf(stderr, "Failed to convert bitmap to PNG\n");
468     return "";
469   }
470 
471   std::string filename = GeneratePageOutputFilename(pdf_name, num, "png");
472   if (filename.empty()) {
473     return std::string();
474   }
475   FILE* fp = fopen(filename.c_str(), "wb");
476   if (!fp) {
477     fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
478     return std::string();
479   }
480 
481   size_t bytes_written =
482       fwrite(&png_encoding.front(), 1, png_encoding.size(), fp);
483   if (bytes_written != png_encoding.size()) {
484     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
485   }
486 
487   (void)fclose(fp);
488   return filename;
489 }
490 
491 #ifdef _WIN32
WriteBmp(const char * pdf_name,int num,void * buffer,int stride,int width,int height)492 std::string WriteBmp(const char* pdf_name,
493                      int num,
494                      void* buffer,
495                      int stride,
496                      int width,
497                      int height) {
498   if (!CheckDimensions(stride, width, height)) {
499     return std::string();
500   }
501 
502   int out_len = stride * height;
503   if (out_len > INT_MAX / 3) {
504     return std::string();
505   }
506 
507   std::string filename = GeneratePageOutputFilename(pdf_name, num, "bmp");
508   if (filename.empty()) {
509     return std::string();
510   }
511   FILE* fp = fopen(filename.c_str(), "wb");
512   if (!fp) {
513     return std::string();
514   }
515 
516   BITMAPINFO bmi = {};
517   bmi.bmiHeader.biSize = sizeof(bmi) - sizeof(RGBQUAD);
518   bmi.bmiHeader.biWidth = width;
519   bmi.bmiHeader.biHeight = -height;  // top-down image
520   bmi.bmiHeader.biPlanes = 1;
521   bmi.bmiHeader.biBitCount = 32;
522   bmi.bmiHeader.biCompression = BI_RGB;
523   bmi.bmiHeader.biSizeImage = 0;
524 
525   BITMAPFILEHEADER file_header = {};
526   file_header.bfType = 0x4d42;
527   file_header.bfSize = sizeof(file_header) + bmi.bmiHeader.biSize + out_len;
528   file_header.bfOffBits = file_header.bfSize - out_len;
529 
530   if (fwrite(&file_header, sizeof(file_header), 1, fp) != 1 ||
531       fwrite(&bmi, bmi.bmiHeader.biSize, 1, fp) != 1 ||
532       fwrite(buffer, out_len, 1, fp) != 1) {
533     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
534   }
535   fclose(fp);
536   return filename;
537 }
538 
WriteEmf(FPDF_PAGE page,const char * pdf_name,int num)539 void WriteEmf(FPDF_PAGE page, const char* pdf_name, int num) {
540   std::string filename = GeneratePageOutputFilename(pdf_name, num, "emf");
541   if (filename.empty()) {
542     return;
543   }
544 
545   HDC dc = CreateEnhMetaFileA(nullptr, filename.c_str(), nullptr, nullptr);
546 
547   int width = static_cast<int>(FPDF_GetPageWidthF(page));
548   int height = static_cast<int>(FPDF_GetPageHeightF(page));
549   HRGN rgn = CreateRectRgn(0, 0, width, height);
550   SelectClipRgn(dc, rgn);
551   DeleteObject(rgn);
552 
553   SelectObject(dc, GetStockObject(NULL_PEN));
554   SelectObject(dc, GetStockObject(WHITE_BRUSH));
555   // If a PS_NULL pen is used, the dimensions of the rectangle are 1 pixel less.
556   Rectangle(dc, 0, 0, width + 1, height + 1);
557 
558   CHECK(FPDF_RenderPage(dc, page, 0, 0, width, height, 0,
559                         FPDF_ANNOT | FPDF_PRINTING));
560 
561   DeleteEnhMetaFile(CloseEnhMetaFile(dc));
562 }
563 
WritePS(FPDF_PAGE page,const char * pdf_name,int num)564 void WritePS(FPDF_PAGE page, const char* pdf_name, int num) {
565   std::string filename = GeneratePageOutputFilename(pdf_name, num, "ps");
566   if (filename.empty()) {
567     return;
568   }
569   FILE* fp = fopen(filename.c_str(), "wb");
570   if (!fp) {
571     return;
572   }
573 
574   HDC dc = CreateEnhMetaFileA(nullptr, nullptr, nullptr, nullptr);
575 
576   int width = static_cast<int>(FPDF_GetPageWidthF(page));
577   int height = static_cast<int>(FPDF_GetPageHeightF(page));
578   CHECK(FPDF_RenderPage(dc, page, 0, 0, width, height, 0,
579                         FPDF_ANNOT | FPDF_PRINTING));
580 
581   HENHMETAFILE emf = CloseEnhMetaFile(dc);
582   std::vector<const ENHMETARECORD*> items;
583   EnumEnhMetaFile(nullptr, emf, &EnhMetaFileProc, &items, nullptr);
584   for (const ENHMETARECORD* record : items) {
585     if (record->iType != EMR_GDICOMMENT) {
586       continue;
587     }
588 
589     const auto* comment = reinterpret_cast<const EMRGDICOMMENT*>(record);
590     const char* data = reinterpret_cast<const char*>(comment->Data);
591     uint16_t size = *reinterpret_cast<const uint16_t*>(data);
592     if (fwrite(data + sizeof(uint16_t), size, 1, fp) != 1) {
593       fprintf(stderr, "Failed to write to %s\n", filename.c_str());
594       break;
595     }
596   }
597   fclose(fp);
598   DeleteEnhMetaFile(emf);
599 }
600 #endif  // _WIN32
601 
602 #ifdef PDF_ENABLE_SKIA
WriteToSkWStream(const std::string & pdf_name,int num,const std::string & extension)603 std::unique_ptr<SkWStream> WriteToSkWStream(const std::string& pdf_name,
604                                             int num,
605                                             const std::string& extension) {
606   std::string discarded_filename;
607   return WriteToSkWStream(pdf_name, num, extension, discarded_filename);
608 }
609 
WriteToSkWStream(const std::string & pdf_name,int num,const std::string & extension,std::string & filename)610 std::unique_ptr<SkWStream> WriteToSkWStream(const std::string& pdf_name,
611                                             int num,
612                                             const std::string& extension,
613                                             std::string& filename) {
614   filename =
615       GeneratePageOutputFilename(pdf_name.c_str(), num, extension.c_str());
616   if (filename.empty()) {
617     return nullptr;
618   }
619 
620   auto stream = std::make_unique<SkFILEWStream>(filename.c_str());
621   if (!stream->isValid()) {
622     return nullptr;
623   }
624 
625   return stream;
626 }
627 
WriteSkp(const char * pdf_name,int num,const SkPicture & picture)628 std::string WriteSkp(const char* pdf_name, int num, const SkPicture& picture) {
629   std::string filename;
630   std::unique_ptr<SkWStream> stream =
631       WriteToSkWStream(pdf_name, num, "skp", filename);
632   if (!stream) {
633     return "";
634   }
635   SkSerialProcs procs;
636   procs.fImageProc = [](SkImage* img, void*) -> sk_sp<SkData> {
637       return SkPngEncoder::Encode(nullptr, img, SkPngEncoder::Options{});
638   };
639 
640   picture.serialize(stream.get(), &procs);
641   return filename;
642 }
643 #endif  // PDF_ENABLE_SKIA
644 
645 enum class ThumbnailDecodeType { kBitmap, kRawStream, kDecodedStream };
646 
GetThumbnailFilename(char * name_buf,size_t name_buf_size,const char * pdf_name,int page_num,ThumbnailDecodeType decode_type)647 bool GetThumbnailFilename(char* name_buf,
648                           size_t name_buf_size,
649                           const char* pdf_name,
650                           int page_num,
651                           ThumbnailDecodeType decode_type) {
652   const char* format;
653   switch (decode_type) {
654     case ThumbnailDecodeType::kBitmap:
655       format = "%s.thumbnail.%d.png";
656       break;
657     case ThumbnailDecodeType::kDecodedStream:
658       format = "%s.thumbnail.decoded.%d.bin";
659       break;
660     case ThumbnailDecodeType::kRawStream:
661       format = "%s.thumbnail.raw.%d.bin";
662       break;
663   }
664 
665   int chars_formatted =
666       snprintf(name_buf, name_buf_size, format, pdf_name, page_num);
667   if (chars_formatted < 0 ||
668       static_cast<size_t>(chars_formatted) >= name_buf_size) {
669     fprintf(stderr, "Filename %s for saving is too long.\n", name_buf);
670     return false;
671   }
672 
673   return true;
674 }
675 
WriteBufferToFile(const void * buf,size_t buflen,const char * filename,const char * filetype)676 void WriteBufferToFile(const void* buf,
677                        size_t buflen,
678                        const char* filename,
679                        const char* filetype) {
680   FILE* fp = fopen(filename, "wb");
681   if (!fp) {
682     fprintf(stderr, "Failed to open %s for saving %s.", filename, filetype);
683     return;
684   }
685 
686   size_t bytes_written = fwrite(buf, 1, buflen, fp);
687   if (bytes_written == buflen) {
688     fprintf(stderr, "Successfully wrote %s %s.\n", filetype, filename);
689   } else {
690     fprintf(stderr, "Failed to write to %s.\n", filename);
691   }
692   fclose(fp);
693 }
694 
EncodeBitmapToPng(ScopedFPDFBitmap bitmap)695 std::vector<uint8_t> EncodeBitmapToPng(ScopedFPDFBitmap bitmap) {
696   std::vector<uint8_t> png_encoding;
697   int format = FPDFBitmap_GetFormat(bitmap.get());
698   if (format == FPDFBitmap_Unknown) {
699     return png_encoding;
700   }
701 
702   int width = FPDFBitmap_GetWidth(bitmap.get());
703   int height = FPDFBitmap_GetHeight(bitmap.get());
704   int stride = FPDFBitmap_GetStride(bitmap.get());
705   if (!CheckDimensions(stride, width, height)) {
706     return png_encoding;
707   }
708 
709   auto input = pdfium::make_span(
710       static_cast<const uint8_t*>(FPDFBitmap_GetBuffer(bitmap.get())),
711       static_cast<size_t>(stride) * height);
712 
713   png_encoding = EncodePng(input, width, height, stride, format);
714   return png_encoding;
715 }
716 
WriteAttachments(FPDF_DOCUMENT doc,const std::string & name)717 void WriteAttachments(FPDF_DOCUMENT doc, const std::string& name) {
718   for (int i = 0; i < FPDFDoc_GetAttachmentCount(doc); ++i) {
719     FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(doc, i);
720 
721     // Retrieve the attachment file name.
722     std::string attachment_name;
723     unsigned long length_bytes = FPDFAttachment_GetName(attachment, nullptr, 0);
724     if (length_bytes) {
725       std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
726       unsigned long actual_length_bytes =
727           FPDFAttachment_GetName(attachment, buf.data(), length_bytes);
728       if (actual_length_bytes == length_bytes) {
729         attachment_name = GetPlatformString(buf.data());
730       }
731     }
732     if (attachment_name.empty()) {
733       fprintf(stderr, "Attachment #%d has an empty file name.\n", i + 1);
734       continue;
735     }
736 
737     // Calculate the full attachment file name.
738     char save_name[256];
739     int chars_formatted =
740         snprintf(save_name, sizeof(save_name), "%s.attachment.%s", name.c_str(),
741                  attachment_name.c_str());
742     if (chars_formatted < 0 ||
743         static_cast<size_t>(chars_formatted) >= sizeof(save_name)) {
744       fprintf(stderr, "Filename %s is too long.\n", save_name);
745       continue;
746     }
747 
748     // Retrieve the attachment.
749     if (!FPDFAttachment_GetFile(attachment, nullptr, 0, &length_bytes)) {
750       fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
751               attachment_name.c_str());
752       continue;
753     }
754 
755     std::vector<char> data_buf(length_bytes);
756     if (length_bytes) {
757       unsigned long actual_length_bytes;
758       if (!FPDFAttachment_GetFile(attachment, data_buf.data(), length_bytes,
759                                   &actual_length_bytes)) {
760         fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
761                 attachment_name.c_str());
762         continue;
763       }
764     }
765 
766     // Write the attachment file. Since a PDF document could have 0-byte files
767     // as attachments, we should allow saving the 0-byte attachments to files.
768     WriteBufferToFile(data_buf.data(), length_bytes, save_name, "attachment");
769   }
770 }
771 
WriteImages(FPDF_PAGE page,const char * pdf_name,int page_num)772 void WriteImages(FPDF_PAGE page, const char* pdf_name, int page_num) {
773   for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
774     FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
775     if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
776       continue;
777     }
778 
779     ScopedFPDFBitmap bitmap(FPDFImageObj_GetBitmap(obj));
780     if (!bitmap) {
781       fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
782               i + 1, page_num + 1);
783       continue;
784     }
785 
786     std::string filename =
787         GenerateImageOutputFilename(pdf_name, page_num, i, "png");
788     if (filename.empty()) {
789       continue;
790     }
791 
792     std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
793     if (png_encoding.empty()) {
794       fprintf(stderr,
795               "Failed to convert image object #%d, on page #%d to png.\n",
796               i + 1, page_num + 1);
797       continue;
798     }
799 
800     WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
801                       filename.c_str(), "image");
802   }
803 }
804 
WriteRenderedImages(FPDF_DOCUMENT doc,FPDF_PAGE page,const char * pdf_name,int page_num)805 void WriteRenderedImages(FPDF_DOCUMENT doc,
806                          FPDF_PAGE page,
807                          const char* pdf_name,
808                          int page_num) {
809   for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
810     FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
811     if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
812       continue;
813     }
814 
815     ScopedFPDFBitmap bitmap(FPDFImageObj_GetRenderedBitmap(doc, page, obj));
816     if (!bitmap) {
817       fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
818               i + 1, page_num + 1);
819       continue;
820     }
821 
822     std::string filename =
823         GenerateImageOutputFilename(pdf_name, page_num, i, "png");
824     if (filename.empty()) {
825       continue;
826     }
827 
828     std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
829     if (png_encoding.empty()) {
830       fprintf(stderr,
831               "Failed to convert image object #%d, on page #%d to png.\n",
832               i + 1, page_num + 1);
833       continue;
834     }
835 
836     WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
837                       filename.c_str(), "image");
838   }
839 }
840 
WriteDecodedThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)841 void WriteDecodedThumbnailStream(FPDF_PAGE page,
842                                  const char* pdf_name,
843                                  int page_num) {
844   char filename[256];
845   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
846                             ThumbnailDecodeType::kDecodedStream)) {
847     return;
848   }
849 
850   unsigned long decoded_data_size =
851       FPDFPage_GetDecodedThumbnailData(page, nullptr, 0u);
852 
853   // Only continue if there actually is a thumbnail for this page
854   if (decoded_data_size == 0) {
855     fprintf(stderr, "Failed to get decoded thumbnail for page #%d.\n",
856             page_num + 1);
857     return;
858   }
859 
860   std::vector<uint8_t> thumb_buf(decoded_data_size);
861   if (FPDFPage_GetDecodedThumbnailData(
862           page, thumb_buf.data(), decoded_data_size) != decoded_data_size) {
863     fprintf(stderr, "Failed to get decoded thumbnail data for %s.\n", filename);
864     return;
865   }
866 
867   WriteBufferToFile(thumb_buf.data(), decoded_data_size, filename,
868                     "decoded thumbnail");
869 }
870 
WriteRawThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)871 void WriteRawThumbnailStream(FPDF_PAGE page,
872                              const char* pdf_name,
873                              int page_num) {
874   char filename[256];
875   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
876                             ThumbnailDecodeType::kRawStream)) {
877     return;
878   }
879 
880   unsigned long raw_data_size = FPDFPage_GetRawThumbnailData(page, nullptr, 0u);
881 
882   // Only continue if there actually is a thumbnail for this page
883   if (raw_data_size == 0) {
884     fprintf(stderr, "Failed to get raw thumbnail data for page #%d.\n",
885             page_num + 1);
886     return;
887   }
888 
889   std::vector<uint8_t> thumb_buf(raw_data_size);
890   if (FPDFPage_GetRawThumbnailData(page, thumb_buf.data(), raw_data_size) !=
891       raw_data_size) {
892     fprintf(stderr, "Failed to get raw thumbnail data for %s.\n", filename);
893     return;
894   }
895 
896   WriteBufferToFile(thumb_buf.data(), raw_data_size, filename, "raw thumbnail");
897 }
898 
WriteThumbnail(FPDF_PAGE page,const char * pdf_name,int page_num)899 void WriteThumbnail(FPDF_PAGE page, const char* pdf_name, int page_num) {
900   char filename[256];
901   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
902                             ThumbnailDecodeType::kBitmap)) {
903     return;
904   }
905 
906   ScopedFPDFBitmap thumb_bitmap(FPDFPage_GetThumbnailAsBitmap(page));
907   if (!thumb_bitmap) {
908     fprintf(stderr, "Thumbnail of page #%d has an empty bitmap.\n",
909             page_num + 1);
910     return;
911   }
912 
913   std::vector<uint8_t> png_encoding =
914       EncodeBitmapToPng(std::move(thumb_bitmap));
915   if (png_encoding.empty()) {
916     fprintf(stderr, "Failed to convert thumbnail of page #%d to png.\n",
917             page_num + 1);
918     return;
919   }
920 
921   WriteBufferToFile(&png_encoding.front(), png_encoding.size(), filename,
922                     "thumbnail");
923 }
924