• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "samples/pdfium_test_write_helper.h"
6 
7 #include <limits.h>
8 
9 #include <sstream>
10 #include <string>
11 #include <utility>
12 #include <vector>
13 
14 #include "public/cpp/fpdf_scopers.h"
15 #include "public/fpdf_annot.h"
16 #include "public/fpdf_attachment.h"
17 #include "public/fpdf_edit.h"
18 #include "public/fpdf_thumbnail.h"
19 #include "testing/fx_string_testhelpers.h"
20 #include "testing/image_diff/image_diff_png.h"
21 #include "third_party/base/notreached.h"
22 
23 #ifdef PDF_ENABLE_SKIA
24 #include "third_party/skia/include/core/SkPicture.h"  // nogncheck
25 #include "third_party/skia/include/core/SkStream.h"   // nogncheck
26 #endif
27 
28 namespace {
29 
CheckDimensions(int stride,int width,int height)30 bool CheckDimensions(int stride, int width, int height) {
31   if (stride < 0 || width < 0 || height < 0)
32     return false;
33   if (height > 0 && stride > INT_MAX / height)
34     return false;
35   return true;
36 }
37 
AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype)38 const char* AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype) {
39   if (subtype == FPDF_ANNOT_TEXT)
40     return "Text";
41   if (subtype == FPDF_ANNOT_LINK)
42     return "Link";
43   if (subtype == FPDF_ANNOT_FREETEXT)
44     return "FreeText";
45   if (subtype == FPDF_ANNOT_LINE)
46     return "Line";
47   if (subtype == FPDF_ANNOT_SQUARE)
48     return "Square";
49   if (subtype == FPDF_ANNOT_CIRCLE)
50     return "Circle";
51   if (subtype == FPDF_ANNOT_POLYGON)
52     return "Polygon";
53   if (subtype == FPDF_ANNOT_POLYLINE)
54     return "PolyLine";
55   if (subtype == FPDF_ANNOT_HIGHLIGHT)
56     return "Highlight";
57   if (subtype == FPDF_ANNOT_UNDERLINE)
58     return "Underline";
59   if (subtype == FPDF_ANNOT_SQUIGGLY)
60     return "Squiggly";
61   if (subtype == FPDF_ANNOT_STRIKEOUT)
62     return "StrikeOut";
63   if (subtype == FPDF_ANNOT_STAMP)
64     return "Stamp";
65   if (subtype == FPDF_ANNOT_CARET)
66     return "Caret";
67   if (subtype == FPDF_ANNOT_INK)
68     return "Ink";
69   if (subtype == FPDF_ANNOT_POPUP)
70     return "Popup";
71   if (subtype == FPDF_ANNOT_FILEATTACHMENT)
72     return "FileAttachment";
73   if (subtype == FPDF_ANNOT_SOUND)
74     return "Sound";
75   if (subtype == FPDF_ANNOT_MOVIE)
76     return "Movie";
77   if (subtype == FPDF_ANNOT_WIDGET)
78     return "Widget";
79   if (subtype == FPDF_ANNOT_SCREEN)
80     return "Screen";
81   if (subtype == FPDF_ANNOT_PRINTERMARK)
82     return "PrinterMark";
83   if (subtype == FPDF_ANNOT_TRAPNET)
84     return "TrapNet";
85   if (subtype == FPDF_ANNOT_WATERMARK)
86     return "Watermark";
87   if (subtype == FPDF_ANNOT_THREED)
88     return "3D";
89   if (subtype == FPDF_ANNOT_RICHMEDIA)
90     return "RichMedia";
91   if (subtype == FPDF_ANNOT_XFAWIDGET)
92     return "XFAWidget";
93   NOTREACHED();
94   return "";
95 }
96 
AppendFlagString(const char * flag,std::string * output)97 void AppendFlagString(const char* flag, std::string* output) {
98   if (!output->empty())
99     *output += ", ";
100   *output += flag;
101 }
102 
AnnotFlagsToString(int flags)103 std::string AnnotFlagsToString(int flags) {
104   std::string str;
105   if (flags & FPDF_ANNOT_FLAG_INVISIBLE)
106     AppendFlagString("Invisible", &str);
107   if (flags & FPDF_ANNOT_FLAG_HIDDEN)
108     AppendFlagString("Hidden", &str);
109   if (flags & FPDF_ANNOT_FLAG_PRINT)
110     AppendFlagString("Print", &str);
111   if (flags & FPDF_ANNOT_FLAG_NOZOOM)
112     AppendFlagString("NoZoom", &str);
113   if (flags & FPDF_ANNOT_FLAG_NOROTATE)
114     AppendFlagString("NoRotate", &str);
115   if (flags & FPDF_ANNOT_FLAG_NOVIEW)
116     AppendFlagString("NoView", &str);
117   if (flags & FPDF_ANNOT_FLAG_READONLY)
118     AppendFlagString("ReadOnly", &str);
119   if (flags & FPDF_ANNOT_FLAG_LOCKED)
120     AppendFlagString("Locked", &str);
121   if (flags & FPDF_ANNOT_FLAG_TOGGLENOVIEW)
122     AppendFlagString("ToggleNoView", &str);
123   return str;
124 }
125 
PageObjectTypeToCString(int type)126 const char* PageObjectTypeToCString(int type) {
127   if (type == FPDF_PAGEOBJ_TEXT)
128     return "Text";
129   if (type == FPDF_PAGEOBJ_PATH)
130     return "Path";
131   if (type == FPDF_PAGEOBJ_IMAGE)
132     return "Image";
133   if (type == FPDF_PAGEOBJ_SHADING)
134     return "Shading";
135   if (type == FPDF_PAGEOBJ_FORM)
136     return "Form";
137   NOTREACHED();
138   return "";
139 }
140 
EncodePng(pdfium::span<const uint8_t> input,int width,int height,int stride,int format)141 std::vector<uint8_t> EncodePng(pdfium::span<const uint8_t> input,
142                                int width,
143                                int height,
144                                int stride,
145                                int format) {
146   std::vector<uint8_t> png;
147   switch (format) {
148     case FPDFBitmap_Unknown:
149       break;
150     case FPDFBitmap_Gray:
151       png = image_diff_png::EncodeGrayPNG(input, width, height, stride);
152       break;
153     case FPDFBitmap_BGR:
154       png = image_diff_png::EncodeBGRPNG(input, width, height, stride);
155       break;
156     case FPDFBitmap_BGRx:
157       png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
158                                           /*discard_transparency=*/true);
159       break;
160     case FPDFBitmap_BGRA:
161       png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
162                                           /*discard_transparency=*/false);
163       break;
164     default:
165       NOTREACHED();
166   }
167   return png;
168 }
169 
170 #ifdef _WIN32
EnhMetaFileProc(HDC hdc,HANDLETABLE * handle_table,const ENHMETARECORD * record,int objects_count,LPARAM param)171 int CALLBACK EnhMetaFileProc(HDC hdc,
172                              HANDLETABLE* handle_table,
173                              const ENHMETARECORD* record,
174                              int objects_count,
175                              LPARAM param) {
176   std::vector<const ENHMETARECORD*>& items =
177       *reinterpret_cast<std::vector<const ENHMETARECORD*>*>(param);
178   items.push_back(record);
179   return 1;
180 }
181 #endif  // _WIN32
182 
GeneratePageOutputFilename(const char * pdf_name,int page_num,const char * extension)183 std::string GeneratePageOutputFilename(const char* pdf_name,
184                                        int page_num,
185                                        const char* extension) {
186   std::ostringstream stream;
187   stream << pdf_name << "." << page_num << "." << extension;
188   std::string filename = stream.str();
189   if (filename.size() >= 256) {
190     fprintf(stderr, "Filename %s is too long\n", filename.c_str());
191     return std::string();
192   }
193 
194   return filename;
195 }
196 
GenerateImageOutputFilename(const char * pdf_name,int page_num,int image_num,const char * extension)197 std::string GenerateImageOutputFilename(const char* pdf_name,
198                                         int page_num,
199                                         int image_num,
200                                         const char* extension) {
201   std::ostringstream stream;
202   stream << pdf_name << "." << page_num << "." << image_num << "." << extension;
203   std::string filename = stream.str();
204   if (filename.size() >= 256) {
205     fprintf(stderr, "Filename %s for saving image is too long.\n",
206             filename.c_str());
207     return std::string();
208   }
209 
210   return filename;
211 }
212 
213 }  // namespace
214 
WritePpm(const char * pdf_name,int num,void * buffer_void,int stride,int width,int height)215 std::string WritePpm(const char* pdf_name,
216                      int num,
217                      void* buffer_void,
218                      int stride,
219                      int width,
220                      int height) {
221   if (!CheckDimensions(stride, width, height)) {
222     return "";
223   }
224 
225   int out_len = width * height;
226   if (out_len > INT_MAX / 3) {
227     return "";
228   }
229 
230   out_len *= 3;
231 
232   std::string filename = GeneratePageOutputFilename(pdf_name, num, "ppm");
233   if (filename.empty()) {
234     return std::string();
235   }
236   FILE* fp = fopen(filename.c_str(), "wb");
237   if (!fp) {
238     return std::string();
239   }
240 
241   fprintf(fp, "P6\n# PDF test render\n%d %d\n255\n", width, height);
242   // Source data is B, G, R, unused.
243   // Dest data is R, G, B.
244   const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buffer_void);
245   std::vector<uint8_t> result(out_len);
246   for (int h = 0; h < height; ++h) {
247     const uint8_t* src_line = buffer + (stride * h);
248     uint8_t* dest_line = result.data() + (width * h * 3);
249     for (int w = 0; w < width; ++w) {
250       // R
251       dest_line[w * 3] = src_line[(w * 4) + 2];
252       // G
253       dest_line[(w * 3) + 1] = src_line[(w * 4) + 1];
254       // B
255       dest_line[(w * 3) + 2] = src_line[w * 4];
256     }
257   }
258   if (fwrite(result.data(), out_len, 1, fp) != 1) {
259     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
260   }
261 
262   fclose(fp);
263   return filename;
264 }
265 
WriteText(FPDF_TEXTPAGE textpage,const char * pdf_name,int num)266 void WriteText(FPDF_TEXTPAGE textpage, const char* pdf_name, int num) {
267   std::string filename = GeneratePageOutputFilename(pdf_name, num, "txt");
268   if (filename.empty()) {
269     return;
270   }
271   FILE* fp = fopen(filename.c_str(), "w");
272   if (!fp) {
273     fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
274     return;
275   }
276 
277   // Output in UTF32-LE.
278   uint32_t bom = 0x0000FEFF;
279   if (fwrite(&bom, sizeof(bom), 1, fp) != 1) {
280     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
281     (void)fclose(fp);
282     return;
283   }
284 
285   for (int i = 0; i < FPDFText_CountChars(textpage); i++) {
286     uint32_t c = FPDFText_GetUnicode(textpage, i);
287     if (fwrite(&c, sizeof(c), 1, fp) != 1) {
288       fprintf(stderr, "Failed to write to %s\n", filename.c_str());
289       break;
290     }
291   }
292   (void)fclose(fp);
293 }
294 
WriteAnnot(FPDF_PAGE page,const char * pdf_name,int num)295 void WriteAnnot(FPDF_PAGE page, const char* pdf_name, int num) {
296   // Open the output text file.
297   std::string filename = GeneratePageOutputFilename(pdf_name, num, "annot.txt");
298   if (filename.empty()) {
299     return;
300   }
301   FILE* fp = fopen(filename.c_str(), "w");
302   if (!fp) {
303     fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
304     return;
305   }
306 
307   int annot_count = FPDFPage_GetAnnotCount(page);
308   fprintf(fp, "Number of annotations: %d\n\n", annot_count);
309 
310   // Iterate through all annotations on this page.
311   for (int i = 0; i < annot_count; ++i) {
312     // Retrieve the annotation object and its subtype.
313     fprintf(fp, "Annotation #%d:\n", i + 1);
314     ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
315     if (!annot) {
316       fprintf(fp, "Failed to retrieve annotation!\n\n");
317       continue;
318     }
319 
320     FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
321     fprintf(fp, "Subtype: %s\n", AnnotSubtypeToCString(subtype));
322 
323     // Retrieve the annotation flags.
324     fprintf(fp, "Flags set: %s\n",
325             AnnotFlagsToString(FPDFAnnot_GetFlags(annot.get())).c_str());
326 
327     // Retrieve the annotation's object count and object types.
328     const int obj_count = FPDFAnnot_GetObjectCount(annot.get());
329     fprintf(fp, "Number of objects: %d\n", obj_count);
330     if (obj_count > 0) {
331       fprintf(fp, "Object types: ");
332       for (int j = 0; j < obj_count; ++j) {
333         const char* type = PageObjectTypeToCString(
334             FPDFPageObj_GetType(FPDFAnnot_GetObject(annot.get(), j)));
335         fprintf(fp, "%s  ", type);
336       }
337       fprintf(fp, "\n");
338     }
339 
340     // Retrieve the annotation's color and interior color.
341     unsigned int R;
342     unsigned int G;
343     unsigned int B;
344     unsigned int A;
345     if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_Color, &R, &G, &B,
346                            &A)) {
347       fprintf(fp, "Color in RGBA: %d %d %d %d\n", R, G, B, A);
348     } else {
349       fprintf(fp, "Failed to retrieve color.\n");
350     }
351     if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_InteriorColor, &R,
352                            &G, &B, &A)) {
353       fprintf(fp, "Interior color in RGBA: %d %d %d %d\n", R, G, B, A);
354     } else {
355       fprintf(fp, "Failed to retrieve interior color.\n");
356     }
357 
358     // Retrieve the annotation's contents and author.
359     static constexpr char kContentsKey[] = "Contents";
360     static constexpr char kAuthorKey[] = "T";
361     unsigned long length_bytes =
362         FPDFAnnot_GetStringValue(annot.get(), kContentsKey, nullptr, 0);
363     std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
364     FPDFAnnot_GetStringValue(annot.get(), kContentsKey, buf.data(),
365                              length_bytes);
366     fprintf(fp, "Content: %ls\n", GetPlatformWString(buf.data()).c_str());
367     length_bytes =
368         FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, nullptr, 0);
369     buf = GetFPDFWideStringBuffer(length_bytes);
370     FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, buf.data(), length_bytes);
371     fprintf(fp, "Author: %ls\n", GetPlatformWString(buf.data()).c_str());
372 
373     // Retrieve the annotation's quadpoints if it is a markup annotation.
374     if (FPDFAnnot_HasAttachmentPoints(annot.get())) {
375       size_t qp_count = FPDFAnnot_CountAttachmentPoints(annot.get());
376       fprintf(fp, "Number of quadpoints sets: %zu\n", qp_count);
377 
378       // Iterate through all quadpoints of the current annotation
379       for (size_t j = 0; j < qp_count; ++j) {
380         FS_QUADPOINTSF quadpoints;
381         if (FPDFAnnot_GetAttachmentPoints(annot.get(), j, &quadpoints)) {
382           fprintf(fp,
383                   "Quadpoints set #%zu: (%.3f, %.3f), (%.3f, %.3f), "
384                   "(%.3f, %.3f), (%.3f, %.3f)\n",
385                   j + 1, quadpoints.x1, quadpoints.y1, quadpoints.x2,
386                   quadpoints.y2, quadpoints.x3, quadpoints.y3, quadpoints.x4,
387                   quadpoints.y4);
388         } else {
389           fprintf(fp, "Failed to retrieve quadpoints set #%zu.\n", j + 1);
390         }
391       }
392     }
393 
394     // Retrieve the annotation's rectangle coordinates.
395     FS_RECTF rect;
396     if (FPDFAnnot_GetRect(annot.get(), &rect)) {
397       fprintf(fp, "Rectangle: l - %.3f, b - %.3f, r - %.3f, t - %.3f\n\n",
398               rect.left, rect.bottom, rect.right, rect.top);
399     } else {
400       fprintf(fp, "Failed to retrieve annotation rectangle.\n");
401     }
402   }
403 
404   (void)fclose(fp);
405 }
406 
WritePng(const char * pdf_name,int num,void * buffer,int stride,int width,int height)407 std::string WritePng(const char* pdf_name,
408                      int num,
409                      void* buffer,
410                      int stride,
411                      int width,
412                      int height) {
413   if (!CheckDimensions(stride, width, height)) {
414     return "";
415   }
416 
417   auto input =
418       pdfium::make_span(static_cast<uint8_t*>(buffer), stride * height);
419   std::vector<uint8_t> png_encoding =
420       EncodePng(input, width, height, stride, FPDFBitmap_BGRA);
421   if (png_encoding.empty()) {
422     fprintf(stderr, "Failed to convert bitmap to PNG\n");
423     return "";
424   }
425 
426   std::string filename = GeneratePageOutputFilename(pdf_name, num, "png");
427   if (filename.empty()) {
428     return std::string();
429   }
430   FILE* fp = fopen(filename.c_str(), "wb");
431   if (!fp) {
432     fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
433     return std::string();
434   }
435 
436   size_t bytes_written =
437       fwrite(&png_encoding.front(), 1, png_encoding.size(), fp);
438   if (bytes_written != png_encoding.size()) {
439     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
440   }
441 
442   (void)fclose(fp);
443   return filename;
444 }
445 
446 #ifdef _WIN32
WriteBmp(const char * pdf_name,int num,void * buffer,int stride,int width,int height)447 std::string WriteBmp(const char* pdf_name,
448                      int num,
449                      void* buffer,
450                      int stride,
451                      int width,
452                      int height) {
453   if (!CheckDimensions(stride, width, height)) {
454     return std::string();
455   }
456 
457   int out_len = stride * height;
458   if (out_len > INT_MAX / 3) {
459     return std::string();
460   }
461 
462   std::string filename = GeneratePageOutputFilename(pdf_name, num, "bmp");
463   if (filename.empty()) {
464     return std::string();
465   }
466   FILE* fp = fopen(filename.c_str(), "wb");
467   if (!fp) {
468     return std::string();
469   }
470 
471   BITMAPINFO bmi = {};
472   bmi.bmiHeader.biSize = sizeof(bmi) - sizeof(RGBQUAD);
473   bmi.bmiHeader.biWidth = width;
474   bmi.bmiHeader.biHeight = -height;  // top-down image
475   bmi.bmiHeader.biPlanes = 1;
476   bmi.bmiHeader.biBitCount = 32;
477   bmi.bmiHeader.biCompression = BI_RGB;
478   bmi.bmiHeader.biSizeImage = 0;
479 
480   BITMAPFILEHEADER file_header = {};
481   file_header.bfType = 0x4d42;
482   file_header.bfSize = sizeof(file_header) + bmi.bmiHeader.biSize + out_len;
483   file_header.bfOffBits = file_header.bfSize - out_len;
484 
485   if (fwrite(&file_header, sizeof(file_header), 1, fp) != 1 ||
486       fwrite(&bmi, bmi.bmiHeader.biSize, 1, fp) != 1 ||
487       fwrite(buffer, out_len, 1, fp) != 1) {
488     fprintf(stderr, "Failed to write to %s\n", filename.c_str());
489   }
490   fclose(fp);
491   return filename;
492 }
493 
WriteEmf(FPDF_PAGE page,const char * pdf_name,int num)494 void WriteEmf(FPDF_PAGE page, const char* pdf_name, int num) {
495   std::string filename = GeneratePageOutputFilename(pdf_name, num, "emf");
496   if (filename.empty()) {
497     return;
498   }
499 
500   HDC dc = CreateEnhMetaFileA(nullptr, filename.c_str(), nullptr, nullptr);
501 
502   int width = static_cast<int>(FPDF_GetPageWidthF(page));
503   int height = static_cast<int>(FPDF_GetPageHeightF(page));
504   HRGN rgn = CreateRectRgn(0, 0, width, height);
505   SelectClipRgn(dc, rgn);
506   DeleteObject(rgn);
507 
508   SelectObject(dc, GetStockObject(NULL_PEN));
509   SelectObject(dc, GetStockObject(WHITE_BRUSH));
510   // If a PS_NULL pen is used, the dimensions of the rectangle are 1 pixel less.
511   Rectangle(dc, 0, 0, width + 1, height + 1);
512 
513   FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
514 
515   DeleteEnhMetaFile(CloseEnhMetaFile(dc));
516 }
517 
WritePS(FPDF_PAGE page,const char * pdf_name,int num)518 void WritePS(FPDF_PAGE page, const char* pdf_name, int num) {
519   std::string filename = GeneratePageOutputFilename(pdf_name, num, "ps");
520   if (filename.empty()) {
521     return;
522   }
523   FILE* fp = fopen(filename.c_str(), "wb");
524   if (!fp)
525     return;
526 
527   HDC dc = CreateEnhMetaFileA(nullptr, nullptr, nullptr, nullptr);
528 
529   int width = static_cast<int>(FPDF_GetPageWidthF(page));
530   int height = static_cast<int>(FPDF_GetPageHeightF(page));
531   FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
532 
533   HENHMETAFILE emf = CloseEnhMetaFile(dc);
534   std::vector<const ENHMETARECORD*> items;
535   EnumEnhMetaFile(nullptr, emf, &EnhMetaFileProc, &items, nullptr);
536   for (const ENHMETARECORD* record : items) {
537     if (record->iType != EMR_GDICOMMENT)
538       continue;
539 
540     const auto* comment = reinterpret_cast<const EMRGDICOMMENT*>(record);
541     const char* data = reinterpret_cast<const char*>(comment->Data);
542     uint16_t size = *reinterpret_cast<const uint16_t*>(data);
543     if (fwrite(data + sizeof(uint16_t), size, 1, fp) != 1) {
544       fprintf(stderr, "Failed to write to %s\n", filename.c_str());
545       break;
546     }
547   }
548   fclose(fp);
549   DeleteEnhMetaFile(emf);
550 }
551 #endif  // _WIN32
552 
553 #ifdef PDF_ENABLE_SKIA
WriteSkp(const char * pdf_name,int num,const SkPicture & picture)554 std::string WriteSkp(const char* pdf_name, int num, const SkPicture& picture) {
555   std::string filename = GeneratePageOutputFilename(pdf_name, num, "skp");
556   if (filename.empty()) {
557     return filename;
558   }
559   SkFILEWStream wStream(filename.c_str());
560   picture.serialize(&wStream);
561   return filename;
562 }
563 #endif
564 
565 enum class ThumbnailDecodeType { kBitmap, kRawStream, kDecodedStream };
566 
GetThumbnailFilename(char * name_buf,size_t name_buf_size,const char * pdf_name,int page_num,ThumbnailDecodeType decode_type)567 bool GetThumbnailFilename(char* name_buf,
568                           size_t name_buf_size,
569                           const char* pdf_name,
570                           int page_num,
571                           ThumbnailDecodeType decode_type) {
572   const char* format;
573   switch (decode_type) {
574     case ThumbnailDecodeType::kBitmap:
575       format = "%s.thumbnail.%d.png";
576       break;
577     case ThumbnailDecodeType::kDecodedStream:
578       format = "%s.thumbnail.decoded.%d.bin";
579       break;
580     case ThumbnailDecodeType::kRawStream:
581       format = "%s.thumbnail.raw.%d.bin";
582       break;
583   }
584 
585   int chars_formatted =
586       snprintf(name_buf, name_buf_size, format, pdf_name, page_num);
587   if (chars_formatted < 0 ||
588       static_cast<size_t>(chars_formatted) >= name_buf_size) {
589     fprintf(stderr, "Filename %s for saving is too long.\n", name_buf);
590     return false;
591   }
592 
593   return true;
594 }
595 
WriteBufferToFile(const void * buf,size_t buflen,const char * filename,const char * filetype)596 void WriteBufferToFile(const void* buf,
597                        size_t buflen,
598                        const char* filename,
599                        const char* filetype) {
600   FILE* fp = fopen(filename, "wb");
601   if (!fp) {
602     fprintf(stderr, "Failed to open %s for saving %s.", filename, filetype);
603     return;
604   }
605 
606   size_t bytes_written = fwrite(buf, 1, buflen, fp);
607   if (bytes_written == buflen)
608     fprintf(stderr, "Successfully wrote %s %s.\n", filetype, filename);
609   else
610     fprintf(stderr, "Failed to write to %s.\n", filename);
611   fclose(fp);
612 }
613 
EncodeBitmapToPng(ScopedFPDFBitmap bitmap)614 std::vector<uint8_t> EncodeBitmapToPng(ScopedFPDFBitmap bitmap) {
615   std::vector<uint8_t> png_encoding;
616   int format = FPDFBitmap_GetFormat(bitmap.get());
617   if (format == FPDFBitmap_Unknown)
618     return png_encoding;
619 
620   int width = FPDFBitmap_GetWidth(bitmap.get());
621   int height = FPDFBitmap_GetHeight(bitmap.get());
622   int stride = FPDFBitmap_GetStride(bitmap.get());
623   if (!CheckDimensions(stride, width, height))
624     return png_encoding;
625 
626   auto input = pdfium::make_span(
627       static_cast<const uint8_t*>(FPDFBitmap_GetBuffer(bitmap.get())),
628       stride * height);
629 
630   png_encoding = EncodePng(input, width, height, stride, format);
631   return png_encoding;
632 }
633 
WriteAttachments(FPDF_DOCUMENT doc,const std::string & name)634 void WriteAttachments(FPDF_DOCUMENT doc, const std::string& name) {
635   for (int i = 0; i < FPDFDoc_GetAttachmentCount(doc); ++i) {
636     FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(doc, i);
637 
638     // Retrieve the attachment file name.
639     std::string attachment_name;
640     unsigned long length_bytes = FPDFAttachment_GetName(attachment, nullptr, 0);
641     if (length_bytes) {
642       std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
643       unsigned long actual_length_bytes =
644           FPDFAttachment_GetName(attachment, buf.data(), length_bytes);
645       if (actual_length_bytes == length_bytes)
646         attachment_name = GetPlatformString(buf.data());
647     }
648     if (attachment_name.empty()) {
649       fprintf(stderr, "Attachment #%d has an empty file name.\n", i + 1);
650       continue;
651     }
652 
653     // Calculate the full attachment file name.
654     char save_name[256];
655     int chars_formatted =
656         snprintf(save_name, sizeof(save_name), "%s.attachment.%s", name.c_str(),
657                  attachment_name.c_str());
658     if (chars_formatted < 0 ||
659         static_cast<size_t>(chars_formatted) >= sizeof(save_name)) {
660       fprintf(stderr, "Filename %s is too long.\n", save_name);
661       continue;
662     }
663 
664     // Retrieve the attachment.
665     if (!FPDFAttachment_GetFile(attachment, nullptr, 0, &length_bytes)) {
666       fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
667               attachment_name.c_str());
668       continue;
669     }
670 
671     std::vector<char> data_buf(length_bytes);
672     if (length_bytes) {
673       unsigned long actual_length_bytes;
674       if (!FPDFAttachment_GetFile(attachment, data_buf.data(), length_bytes,
675                                   &actual_length_bytes)) {
676         fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
677                 attachment_name.c_str());
678         continue;
679       }
680     }
681 
682     // Write the attachment file. Since a PDF document could have 0-byte files
683     // as attachments, we should allow saving the 0-byte attachments to files.
684     WriteBufferToFile(data_buf.data(), length_bytes, save_name, "attachment");
685   }
686 }
687 
WriteImages(FPDF_PAGE page,const char * pdf_name,int page_num)688 void WriteImages(FPDF_PAGE page, const char* pdf_name, int page_num) {
689   for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
690     FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
691     if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
692       continue;
693     }
694 
695     ScopedFPDFBitmap bitmap(FPDFImageObj_GetBitmap(obj));
696     if (!bitmap) {
697       fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
698               i + 1, page_num + 1);
699       continue;
700     }
701 
702     std::string filename =
703         GenerateImageOutputFilename(pdf_name, page_num, i, "png");
704     if (filename.empty()) {
705       continue;
706     }
707 
708     std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
709     if (png_encoding.empty()) {
710       fprintf(stderr,
711               "Failed to convert image object #%d, on page #%d to png.\n",
712               i + 1, page_num + 1);
713       continue;
714     }
715 
716     WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
717                       filename.c_str(), "image");
718   }
719 }
720 
WriteRenderedImages(FPDF_DOCUMENT doc,FPDF_PAGE page,const char * pdf_name,int page_num)721 void WriteRenderedImages(FPDF_DOCUMENT doc,
722                          FPDF_PAGE page,
723                          const char* pdf_name,
724                          int page_num) {
725   for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
726     FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
727     if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
728       continue;
729     }
730 
731     ScopedFPDFBitmap bitmap(FPDFImageObj_GetRenderedBitmap(doc, page, obj));
732     if (!bitmap) {
733       fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
734               i + 1, page_num + 1);
735       continue;
736     }
737 
738     std::string filename =
739         GenerateImageOutputFilename(pdf_name, page_num, i, "png");
740     if (filename.empty()) {
741       continue;
742     }
743 
744     std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
745     if (png_encoding.empty()) {
746       fprintf(stderr,
747               "Failed to convert image object #%d, on page #%d to png.\n",
748               i + 1, page_num + 1);
749       continue;
750     }
751 
752     WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
753                       filename.c_str(), "image");
754   }
755 }
756 
WriteDecodedThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)757 void WriteDecodedThumbnailStream(FPDF_PAGE page,
758                                  const char* pdf_name,
759                                  int page_num) {
760   char filename[256];
761   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
762                             ThumbnailDecodeType::kDecodedStream)) {
763     return;
764   }
765 
766   unsigned long decoded_data_size =
767       FPDFPage_GetDecodedThumbnailData(page, nullptr, 0u);
768 
769   // Only continue if there actually is a thumbnail for this page
770   if (decoded_data_size == 0) {
771     fprintf(stderr, "Failed to get decoded thumbnail for page #%d.\n",
772             page_num + 1);
773     return;
774   }
775 
776   std::vector<uint8_t> thumb_buf(decoded_data_size);
777   if (FPDFPage_GetDecodedThumbnailData(
778           page, thumb_buf.data(), decoded_data_size) != decoded_data_size) {
779     fprintf(stderr, "Failed to get decoded thumbnail data for %s.\n", filename);
780     return;
781   }
782 
783   WriteBufferToFile(thumb_buf.data(), decoded_data_size, filename,
784                     "decoded thumbnail");
785 }
786 
WriteRawThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)787 void WriteRawThumbnailStream(FPDF_PAGE page,
788                              const char* pdf_name,
789                              int page_num) {
790   char filename[256];
791   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
792                             ThumbnailDecodeType::kRawStream)) {
793     return;
794   }
795 
796   unsigned long raw_data_size = FPDFPage_GetRawThumbnailData(page, nullptr, 0u);
797 
798   // Only continue if there actually is a thumbnail for this page
799   if (raw_data_size == 0) {
800     fprintf(stderr, "Failed to get raw thumbnail data for page #%d.\n",
801             page_num + 1);
802     return;
803   }
804 
805   std::vector<uint8_t> thumb_buf(raw_data_size);
806   if (FPDFPage_GetRawThumbnailData(page, thumb_buf.data(), raw_data_size) !=
807       raw_data_size) {
808     fprintf(stderr, "Failed to get raw thumbnail data for %s.\n", filename);
809     return;
810   }
811 
812   WriteBufferToFile(thumb_buf.data(), raw_data_size, filename, "raw thumbnail");
813 }
814 
WriteThumbnail(FPDF_PAGE page,const char * pdf_name,int page_num)815 void WriteThumbnail(FPDF_PAGE page, const char* pdf_name, int page_num) {
816   char filename[256];
817   if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
818                             ThumbnailDecodeType::kBitmap)) {
819     return;
820   }
821 
822   ScopedFPDFBitmap thumb_bitmap(FPDFPage_GetThumbnailAsBitmap(page));
823   if (!thumb_bitmap) {
824     fprintf(stderr, "Thumbnail of page #%d has an empty bitmap.\n",
825             page_num + 1);
826     return;
827   }
828 
829   std::vector<uint8_t> png_encoding =
830       EncodeBitmapToPng(std::move(thumb_bitmap));
831   if (png_encoding.empty()) {
832     fprintf(stderr, "Failed to convert thumbnail of page #%d to png.\n",
833             page_num + 1);
834     return;
835   }
836 
837   WriteBufferToFile(&png_encoding.front(), png_encoding.size(), filename,
838                     "thumbnail");
839 }
840