1 // Copyright 2018 The PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "samples/pdfium_test_write_helper.h"
6
7 #include <limits.h>
8
9 #include <memory>
10 #include <string>
11 #include <utility>
12 #include <vector>
13
14 #include "public/cpp/fpdf_scopers.h"
15 #include "public/fpdf_annot.h"
16 #include "public/fpdf_attachment.h"
17 #include "public/fpdf_edit.h"
18 #include "public/fpdf_thumbnail.h"
19 #include "testing/fx_string_testhelpers.h"
20 #include "testing/image_diff/image_diff_png.h"
21 #include "third_party/base/logging.h"
22
23 namespace {
24
CheckDimensions(int stride,int width,int height)25 bool CheckDimensions(int stride, int width, int height) {
26 if (stride < 0 || width < 0 || height < 0)
27 return false;
28 if (height > 0 && stride > INT_MAX / height)
29 return false;
30 return true;
31 }
32
AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype)33 const char* AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype) {
34 if (subtype == FPDF_ANNOT_TEXT)
35 return "Text";
36 if (subtype == FPDF_ANNOT_LINK)
37 return "Link";
38 if (subtype == FPDF_ANNOT_FREETEXT)
39 return "FreeText";
40 if (subtype == FPDF_ANNOT_LINE)
41 return "Line";
42 if (subtype == FPDF_ANNOT_SQUARE)
43 return "Square";
44 if (subtype == FPDF_ANNOT_CIRCLE)
45 return "Circle";
46 if (subtype == FPDF_ANNOT_POLYGON)
47 return "Polygon";
48 if (subtype == FPDF_ANNOT_POLYLINE)
49 return "PolyLine";
50 if (subtype == FPDF_ANNOT_HIGHLIGHT)
51 return "Highlight";
52 if (subtype == FPDF_ANNOT_UNDERLINE)
53 return "Underline";
54 if (subtype == FPDF_ANNOT_SQUIGGLY)
55 return "Squiggly";
56 if (subtype == FPDF_ANNOT_STRIKEOUT)
57 return "StrikeOut";
58 if (subtype == FPDF_ANNOT_STAMP)
59 return "Stamp";
60 if (subtype == FPDF_ANNOT_CARET)
61 return "Caret";
62 if (subtype == FPDF_ANNOT_INK)
63 return "Ink";
64 if (subtype == FPDF_ANNOT_POPUP)
65 return "Popup";
66 if (subtype == FPDF_ANNOT_FILEATTACHMENT)
67 return "FileAttachment";
68 if (subtype == FPDF_ANNOT_SOUND)
69 return "Sound";
70 if (subtype == FPDF_ANNOT_MOVIE)
71 return "Movie";
72 if (subtype == FPDF_ANNOT_WIDGET)
73 return "Widget";
74 if (subtype == FPDF_ANNOT_SCREEN)
75 return "Screen";
76 if (subtype == FPDF_ANNOT_PRINTERMARK)
77 return "PrinterMark";
78 if (subtype == FPDF_ANNOT_TRAPNET)
79 return "TrapNet";
80 if (subtype == FPDF_ANNOT_WATERMARK)
81 return "Watermark";
82 if (subtype == FPDF_ANNOT_THREED)
83 return "3D";
84 if (subtype == FPDF_ANNOT_RICHMEDIA)
85 return "RichMedia";
86 if (subtype == FPDF_ANNOT_XFAWIDGET)
87 return "XFAWidget";
88 NOTREACHED();
89 return "";
90 }
91
AppendFlagString(const char * flag,std::string * output)92 void AppendFlagString(const char* flag, std::string* output) {
93 if (!output->empty())
94 *output += ", ";
95 *output += flag;
96 }
97
AnnotFlagsToString(int flags)98 std::string AnnotFlagsToString(int flags) {
99 std::string str;
100 if (flags & FPDF_ANNOT_FLAG_INVISIBLE)
101 AppendFlagString("Invisible", &str);
102 if (flags & FPDF_ANNOT_FLAG_HIDDEN)
103 AppendFlagString("Hidden", &str);
104 if (flags & FPDF_ANNOT_FLAG_PRINT)
105 AppendFlagString("Print", &str);
106 if (flags & FPDF_ANNOT_FLAG_NOZOOM)
107 AppendFlagString("NoZoom", &str);
108 if (flags & FPDF_ANNOT_FLAG_NOROTATE)
109 AppendFlagString("NoRotate", &str);
110 if (flags & FPDF_ANNOT_FLAG_NOVIEW)
111 AppendFlagString("NoView", &str);
112 if (flags & FPDF_ANNOT_FLAG_READONLY)
113 AppendFlagString("ReadOnly", &str);
114 if (flags & FPDF_ANNOT_FLAG_LOCKED)
115 AppendFlagString("Locked", &str);
116 if (flags & FPDF_ANNOT_FLAG_TOGGLENOVIEW)
117 AppendFlagString("ToggleNoView", &str);
118 return str;
119 }
120
PageObjectTypeToCString(int type)121 const char* PageObjectTypeToCString(int type) {
122 if (type == FPDF_PAGEOBJ_TEXT)
123 return "Text";
124 if (type == FPDF_PAGEOBJ_PATH)
125 return "Path";
126 if (type == FPDF_PAGEOBJ_IMAGE)
127 return "Image";
128 if (type == FPDF_PAGEOBJ_SHADING)
129 return "Shading";
130 if (type == FPDF_PAGEOBJ_FORM)
131 return "Form";
132 NOTREACHED();
133 return "";
134 }
135
EncodePng(pdfium::span<const uint8_t> input,int width,int height,int stride,int format)136 std::vector<uint8_t> EncodePng(pdfium::span<const uint8_t> input,
137 int width,
138 int height,
139 int stride,
140 int format) {
141 std::vector<uint8_t> png;
142 switch (format) {
143 case FPDFBitmap_Unknown:
144 break;
145 case FPDFBitmap_Gray:
146 png = image_diff_png::EncodeGrayPNG(input, width, height, stride);
147 break;
148 case FPDFBitmap_BGR:
149 png = image_diff_png::EncodeBGRPNG(input, width, height, stride);
150 break;
151 case FPDFBitmap_BGRx:
152 png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
153 /*discard_transparency=*/true);
154 break;
155 case FPDFBitmap_BGRA:
156 png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
157 /*discard_transparency=*/false);
158 break;
159 default:
160 NOTREACHED();
161 }
162 return png;
163 }
164
165 #ifdef _WIN32
EnhMetaFileProc(HDC hdc,HANDLETABLE * handle_table,const ENHMETARECORD * record,int objects_count,LPARAM param)166 int CALLBACK EnhMetaFileProc(HDC hdc,
167 HANDLETABLE* handle_table,
168 const ENHMETARECORD* record,
169 int objects_count,
170 LPARAM param) {
171 std::vector<const ENHMETARECORD*>& items =
172 *reinterpret_cast<std::vector<const ENHMETARECORD*>*>(param);
173 items.push_back(record);
174 return 1;
175 }
176 #endif // _WIN32
177
178 } // namespace
179
WritePpm(const char * pdf_name,int num,void * buffer_void,int stride,int width,int height)180 std::string WritePpm(const char* pdf_name,
181 int num,
182 void* buffer_void,
183 int stride,
184 int width,
185 int height) {
186 if (!CheckDimensions(stride, width, height))
187 return "";
188
189 int out_len = width * height;
190 if (out_len > INT_MAX / 3)
191 return "";
192
193 out_len *= 3;
194
195 char filename[256];
196 snprintf(filename, sizeof(filename), "%s.%d.ppm", pdf_name, num);
197 FILE* fp = fopen(filename, "wb");
198 if (!fp)
199 return "";
200
201 fprintf(fp, "P6\n# PDF test render\n%d %d\n255\n", width, height);
202 // Source data is B, G, R, unused.
203 // Dest data is R, G, B.
204 const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buffer_void);
205 std::vector<uint8_t> result(out_len);
206 for (int h = 0; h < height; ++h) {
207 const uint8_t* src_line = buffer + (stride * h);
208 uint8_t* dest_line = result.data() + (width * h * 3);
209 for (int w = 0; w < width; ++w) {
210 // R
211 dest_line[w * 3] = src_line[(w * 4) + 2];
212 // G
213 dest_line[(w * 3) + 1] = src_line[(w * 4) + 1];
214 // B
215 dest_line[(w * 3) + 2] = src_line[w * 4];
216 }
217 }
218 if (fwrite(result.data(), out_len, 1, fp) != 1)
219 fprintf(stderr, "Failed to write to %s\n", filename);
220
221 fclose(fp);
222 return std::string(filename);
223 }
224
WriteText(FPDF_PAGE page,const char * pdf_name,int num)225 void WriteText(FPDF_PAGE page, const char* pdf_name, int num) {
226 char filename[256];
227 int chars_formatted =
228 snprintf(filename, sizeof(filename), "%s.%d.txt", pdf_name, num);
229 if (chars_formatted < 0 ||
230 static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
231 fprintf(stderr, "Filename %s is too long\n", filename);
232 return;
233 }
234
235 FILE* fp = fopen(filename, "w");
236 if (!fp) {
237 fprintf(stderr, "Failed to open %s for output\n", filename);
238 return;
239 }
240
241 // Output in UTF32-LE.
242 uint32_t bom = 0x0000FEFF;
243 if (fwrite(&bom, sizeof(bom), 1, fp) != 1) {
244 fprintf(stderr, "Failed to write to %s\n", filename);
245 (void)fclose(fp);
246 return;
247 }
248
249 ScopedFPDFTextPage textpage(FPDFText_LoadPage(page));
250 for (int i = 0; i < FPDFText_CountChars(textpage.get()); i++) {
251 uint32_t c = FPDFText_GetUnicode(textpage.get(), i);
252 if (fwrite(&c, sizeof(c), 1, fp) != 1) {
253 fprintf(stderr, "Failed to write to %s\n", filename);
254 break;
255 }
256 }
257 (void)fclose(fp);
258 }
259
WriteAnnot(FPDF_PAGE page,const char * pdf_name,int num)260 void WriteAnnot(FPDF_PAGE page, const char* pdf_name, int num) {
261 // Open the output text file.
262 char filename[256];
263 int chars_formatted =
264 snprintf(filename, sizeof(filename), "%s.%d.annot.txt", pdf_name, num);
265 if (chars_formatted < 0 ||
266 static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
267 fprintf(stderr, "Filename %s is too long\n", filename);
268 return;
269 }
270
271 FILE* fp = fopen(filename, "w");
272 if (!fp) {
273 fprintf(stderr, "Failed to open %s for output\n", filename);
274 return;
275 }
276
277 int annot_count = FPDFPage_GetAnnotCount(page);
278 fprintf(fp, "Number of annotations: %d\n\n", annot_count);
279
280 // Iterate through all annotations on this page.
281 for (int i = 0; i < annot_count; ++i) {
282 // Retrieve the annotation object and its subtype.
283 fprintf(fp, "Annotation #%d:\n", i + 1);
284 ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
285 if (!annot) {
286 fprintf(fp, "Failed to retrieve annotation!\n\n");
287 continue;
288 }
289
290 FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
291 fprintf(fp, "Subtype: %s\n", AnnotSubtypeToCString(subtype));
292
293 // Retrieve the annotation flags.
294 fprintf(fp, "Flags set: %s\n",
295 AnnotFlagsToString(FPDFAnnot_GetFlags(annot.get())).c_str());
296
297 // Retrieve the annotation's object count and object types.
298 const int obj_count = FPDFAnnot_GetObjectCount(annot.get());
299 fprintf(fp, "Number of objects: %d\n", obj_count);
300 if (obj_count > 0) {
301 fprintf(fp, "Object types: ");
302 for (int j = 0; j < obj_count; ++j) {
303 const char* type = PageObjectTypeToCString(
304 FPDFPageObj_GetType(FPDFAnnot_GetObject(annot.get(), j)));
305 fprintf(fp, "%s ", type);
306 }
307 fprintf(fp, "\n");
308 }
309
310 // Retrieve the annotation's color and interior color.
311 unsigned int R;
312 unsigned int G;
313 unsigned int B;
314 unsigned int A;
315 if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_Color, &R, &G, &B,
316 &A)) {
317 fprintf(fp, "Color in RGBA: %d %d %d %d\n", R, G, B, A);
318 } else {
319 fprintf(fp, "Failed to retrieve color.\n");
320 }
321 if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_InteriorColor, &R,
322 &G, &B, &A)) {
323 fprintf(fp, "Interior color in RGBA: %d %d %d %d\n", R, G, B, A);
324 } else {
325 fprintf(fp, "Failed to retrieve interior color.\n");
326 }
327
328 // Retrieve the annotation's contents and author.
329 static constexpr char kContentsKey[] = "Contents";
330 static constexpr char kAuthorKey[] = "T";
331 unsigned long length_bytes =
332 FPDFAnnot_GetStringValue(annot.get(), kContentsKey, nullptr, 0);
333 std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
334 FPDFAnnot_GetStringValue(annot.get(), kContentsKey, buf.data(),
335 length_bytes);
336 fprintf(fp, "Content: %ls\n", GetPlatformWString(buf.data()).c_str());
337 length_bytes =
338 FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, nullptr, 0);
339 buf = GetFPDFWideStringBuffer(length_bytes);
340 FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, buf.data(), length_bytes);
341 fprintf(fp, "Author: %ls\n", GetPlatformWString(buf.data()).c_str());
342
343 // Retrieve the annotation's quadpoints if it is a markup annotation.
344 if (FPDFAnnot_HasAttachmentPoints(annot.get())) {
345 size_t qp_count = FPDFAnnot_CountAttachmentPoints(annot.get());
346 fprintf(fp, "Number of quadpoints sets: %zu\n", qp_count);
347
348 // Iterate through all quadpoints of the current annotation
349 for (size_t j = 0; j < qp_count; ++j) {
350 FS_QUADPOINTSF quadpoints;
351 if (FPDFAnnot_GetAttachmentPoints(annot.get(), j, &quadpoints)) {
352 fprintf(fp,
353 "Quadpoints set #%zu: (%.3f, %.3f), (%.3f, %.3f), "
354 "(%.3f, %.3f), (%.3f, %.3f)\n",
355 j + 1, quadpoints.x1, quadpoints.y1, quadpoints.x2,
356 quadpoints.y2, quadpoints.x3, quadpoints.y3, quadpoints.x4,
357 quadpoints.y4);
358 } else {
359 fprintf(fp, "Failed to retrieve quadpoints set #%zu.\n", j + 1);
360 }
361 }
362 }
363
364 // Retrieve the annotation's rectangle coordinates.
365 FS_RECTF rect;
366 if (FPDFAnnot_GetRect(annot.get(), &rect)) {
367 fprintf(fp, "Rectangle: l - %.3f, b - %.3f, r - %.3f, t - %.3f\n\n",
368 rect.left, rect.bottom, rect.right, rect.top);
369 } else {
370 fprintf(fp, "Failed to retrieve annotation rectangle.\n");
371 }
372 }
373
374 (void)fclose(fp);
375 }
376
WritePng(const char * pdf_name,int num,void * buffer,int stride,int width,int height)377 std::string WritePng(const char* pdf_name,
378 int num,
379 void* buffer,
380 int stride,
381 int width,
382 int height) {
383 if (!CheckDimensions(stride, width, height))
384 return "";
385
386 auto input =
387 pdfium::make_span(static_cast<uint8_t*>(buffer), stride * height);
388 std::vector<uint8_t> png_encoding =
389 EncodePng(input, width, height, stride, FPDFBitmap_BGRA);
390 if (png_encoding.empty()) {
391 fprintf(stderr, "Failed to convert bitmap to PNG\n");
392 return "";
393 }
394
395 char filename[256];
396 int chars_formatted =
397 snprintf(filename, sizeof(filename), "%s.%d.png", pdf_name, num);
398 if (chars_formatted < 0 ||
399 static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
400 fprintf(stderr, "Filename %s is too long\n", filename);
401 return "";
402 }
403
404 FILE* fp = fopen(filename, "wb");
405 if (!fp) {
406 fprintf(stderr, "Failed to open %s for output\n", filename);
407 return "";
408 }
409
410 size_t bytes_written =
411 fwrite(&png_encoding.front(), 1, png_encoding.size(), fp);
412 if (bytes_written != png_encoding.size())
413 fprintf(stderr, "Failed to write to %s\n", filename);
414
415 (void)fclose(fp);
416 return std::string(filename);
417 }
418
419 #ifdef _WIN32
WriteBmp(const char * pdf_name,int num,void * buffer,int stride,int width,int height)420 std::string WriteBmp(const char* pdf_name,
421 int num,
422 void* buffer,
423 int stride,
424 int width,
425 int height) {
426 if (!CheckDimensions(stride, width, height))
427 return "";
428
429 int out_len = stride * height;
430 if (out_len > INT_MAX / 3)
431 return "";
432
433 char filename[256];
434 snprintf(filename, sizeof(filename), "%s.%d.bmp", pdf_name, num);
435 FILE* fp = fopen(filename, "wb");
436 if (!fp)
437 return "";
438
439 BITMAPINFO bmi = {};
440 bmi.bmiHeader.biSize = sizeof(bmi) - sizeof(RGBQUAD);
441 bmi.bmiHeader.biWidth = width;
442 bmi.bmiHeader.biHeight = -height; // top-down image
443 bmi.bmiHeader.biPlanes = 1;
444 bmi.bmiHeader.biBitCount = 32;
445 bmi.bmiHeader.biCompression = BI_RGB;
446 bmi.bmiHeader.biSizeImage = 0;
447
448 BITMAPFILEHEADER file_header = {};
449 file_header.bfType = 0x4d42;
450 file_header.bfSize = sizeof(file_header) + bmi.bmiHeader.biSize + out_len;
451 file_header.bfOffBits = file_header.bfSize - out_len;
452
453 if (fwrite(&file_header, sizeof(file_header), 1, fp) != 1 ||
454 fwrite(&bmi, bmi.bmiHeader.biSize, 1, fp) != 1 ||
455 fwrite(buffer, out_len, 1, fp) != 1) {
456 fprintf(stderr, "Failed to write to %s\n", filename);
457 }
458 fclose(fp);
459 return std::string(filename);
460 }
461
WriteEmf(FPDF_PAGE page,const char * pdf_name,int num)462 void WriteEmf(FPDF_PAGE page, const char* pdf_name, int num) {
463 char filename[256];
464 snprintf(filename, sizeof(filename), "%s.%d.emf", pdf_name, num);
465
466 HDC dc = CreateEnhMetaFileA(nullptr, filename, nullptr, nullptr);
467
468 int width = static_cast<int>(FPDF_GetPageWidthF(page));
469 int height = static_cast<int>(FPDF_GetPageHeightF(page));
470 HRGN rgn = CreateRectRgn(0, 0, width, height);
471 SelectClipRgn(dc, rgn);
472 DeleteObject(rgn);
473
474 SelectObject(dc, GetStockObject(NULL_PEN));
475 SelectObject(dc, GetStockObject(WHITE_BRUSH));
476 // If a PS_NULL pen is used, the dimensions of the rectangle are 1 pixel less.
477 Rectangle(dc, 0, 0, width + 1, height + 1);
478
479 FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
480
481 DeleteEnhMetaFile(CloseEnhMetaFile(dc));
482 }
483
WritePS(FPDF_PAGE page,const char * pdf_name,int num)484 void WritePS(FPDF_PAGE page, const char* pdf_name, int num) {
485 char filename[256];
486 snprintf(filename, sizeof(filename), "%s.%d.ps", pdf_name, num);
487 FILE* fp = fopen(filename, "wb");
488 if (!fp)
489 return;
490
491 HDC dc = CreateEnhMetaFileA(nullptr, nullptr, nullptr, nullptr);
492
493 int width = static_cast<int>(FPDF_GetPageWidthF(page));
494 int height = static_cast<int>(FPDF_GetPageHeightF(page));
495 FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
496
497 HENHMETAFILE emf = CloseEnhMetaFile(dc);
498 std::vector<const ENHMETARECORD*> items;
499 EnumEnhMetaFile(nullptr, emf, &EnhMetaFileProc, &items, nullptr);
500 for (const ENHMETARECORD* record : items) {
501 if (record->iType != EMR_GDICOMMENT)
502 continue;
503
504 const auto* comment = reinterpret_cast<const EMRGDICOMMENT*>(record);
505 const char* data = reinterpret_cast<const char*>(comment->Data);
506 uint16_t size = *reinterpret_cast<const uint16_t*>(data);
507 if (fwrite(data + sizeof(uint16_t), size, 1, fp) != 1) {
508 fprintf(stderr, "Failed to write to %s\n", filename);
509 break;
510 }
511 }
512 fclose(fp);
513 DeleteEnhMetaFile(emf);
514 }
515 #endif // _WIN32
516
517 #ifdef PDF_ENABLE_SKIA
WriteSkp(const char * pdf_name,int num,SkPictureRecorder * recorder)518 std::string WriteSkp(const char* pdf_name,
519 int num,
520 SkPictureRecorder* recorder) {
521 char filename[256];
522 int chars_formatted =
523 snprintf(filename, sizeof(filename), "%s.%d.skp", pdf_name, num);
524
525 if (chars_formatted < 0 ||
526 static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
527 fprintf(stderr, "Filename %s is too long\n", filename);
528 return "";
529 }
530
531 sk_sp<SkPicture> picture(recorder->finishRecordingAsPicture());
532 SkFILEWStream wStream(filename);
533 picture->serialize(&wStream);
534 return std::string(filename);
535 }
536 #endif
537
538 enum class ThumbnailDecodeType { kBitmap, kRawStream, kDecodedStream };
539
GetThumbnailFilename(char * name_buf,size_t name_buf_size,const char * pdf_name,int page_num,ThumbnailDecodeType decode_type)540 bool GetThumbnailFilename(char* name_buf,
541 size_t name_buf_size,
542 const char* pdf_name,
543 int page_num,
544 ThumbnailDecodeType decode_type) {
545 const char* format;
546 switch (decode_type) {
547 case ThumbnailDecodeType::kBitmap:
548 format = "%s.thumbnail.%d.png";
549 break;
550 case ThumbnailDecodeType::kDecodedStream:
551 format = "%s.thumbnail.decoded.%d.bin";
552 break;
553 case ThumbnailDecodeType::kRawStream:
554 format = "%s.thumbnail.raw.%d.bin";
555 break;
556 }
557
558 int chars_formatted =
559 snprintf(name_buf, name_buf_size, format, pdf_name, page_num);
560 if (chars_formatted < 0 ||
561 static_cast<size_t>(chars_formatted) >= name_buf_size) {
562 fprintf(stderr, "Filename %s for saving is too long.\n", name_buf);
563 return false;
564 }
565
566 return true;
567 }
568
WriteBufferToFile(const void * buf,size_t buflen,const char * filename,const char * filetype)569 void WriteBufferToFile(const void* buf,
570 size_t buflen,
571 const char* filename,
572 const char* filetype) {
573 FILE* fp = fopen(filename, "wb");
574 if (!fp) {
575 fprintf(stderr, "Failed to open %s for saving %s.", filename, filetype);
576 return;
577 }
578
579 size_t bytes_written = fwrite(buf, 1, buflen, fp);
580 if (bytes_written == buflen)
581 fprintf(stderr, "Successfully wrote %s %s.\n", filetype, filename);
582 else
583 fprintf(stderr, "Failed to write to %s.\n", filename);
584 fclose(fp);
585 }
586
EncodeBitmapToPng(ScopedFPDFBitmap bitmap)587 std::vector<uint8_t> EncodeBitmapToPng(ScopedFPDFBitmap bitmap) {
588 std::vector<uint8_t> png_encoding;
589 int format = FPDFBitmap_GetFormat(bitmap.get());
590 if (format == FPDFBitmap_Unknown)
591 return png_encoding;
592
593 int width = FPDFBitmap_GetWidth(bitmap.get());
594 int height = FPDFBitmap_GetHeight(bitmap.get());
595 int stride = FPDFBitmap_GetStride(bitmap.get());
596 if (!CheckDimensions(stride, width, height))
597 return png_encoding;
598
599 auto input = pdfium::make_span(
600 static_cast<const uint8_t*>(FPDFBitmap_GetBuffer(bitmap.get())),
601 stride * height);
602
603 png_encoding = EncodePng(input, width, height, stride, format);
604 return png_encoding;
605 }
606
WriteAttachments(FPDF_DOCUMENT doc,const std::string & name)607 void WriteAttachments(FPDF_DOCUMENT doc, const std::string& name) {
608 for (int i = 0; i < FPDFDoc_GetAttachmentCount(doc); ++i) {
609 FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(doc, i);
610
611 // Retrieve the attachment file name.
612 std::string attachment_name;
613 unsigned long length_bytes = FPDFAttachment_GetName(attachment, nullptr, 0);
614 if (length_bytes) {
615 std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
616 unsigned long actual_length_bytes =
617 FPDFAttachment_GetName(attachment, buf.data(), length_bytes);
618 if (actual_length_bytes == length_bytes)
619 attachment_name = GetPlatformString(buf.data());
620 }
621 if (attachment_name.empty()) {
622 fprintf(stderr, "Attachment #%d has an empty file name.\n", i + 1);
623 continue;
624 }
625
626 // Calculate the full attachment file name.
627 char save_name[256];
628 int chars_formatted =
629 snprintf(save_name, sizeof(save_name), "%s.attachment.%s", name.c_str(),
630 attachment_name.c_str());
631 if (chars_formatted < 0 ||
632 static_cast<size_t>(chars_formatted) >= sizeof(save_name)) {
633 fprintf(stderr, "Filename %s is too long.\n", save_name);
634 continue;
635 }
636
637 // Retrieve the attachment.
638 length_bytes = FPDFAttachment_GetFile(attachment, nullptr, 0);
639 std::vector<char> data_buf(length_bytes);
640 if (length_bytes) {
641 unsigned long actual_length_bytes =
642 FPDFAttachment_GetFile(attachment, data_buf.data(), length_bytes);
643 if (actual_length_bytes != length_bytes)
644 data_buf.clear();
645 }
646 if (data_buf.empty()) {
647 fprintf(stderr, "Attachment \"%s\" is empty.\n", attachment_name.c_str());
648 continue;
649 }
650
651 // Write the attachment file.
652 WriteBufferToFile(data_buf.data(), length_bytes, save_name, "attachment");
653 }
654 }
655
WriteImages(FPDF_PAGE page,const char * pdf_name,int page_num)656 void WriteImages(FPDF_PAGE page, const char* pdf_name, int page_num) {
657 for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
658 FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
659 if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE)
660 continue;
661
662 ScopedFPDFBitmap bitmap(FPDFImageObj_GetBitmap(obj));
663 if (!bitmap) {
664 fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
665 i + 1, page_num + 1);
666 continue;
667 }
668
669 char filename[256];
670 int chars_formatted = snprintf(filename, sizeof(filename), "%s.%d.%d.png",
671 pdf_name, page_num, i);
672 if (chars_formatted < 0 ||
673 static_cast<size_t>(chars_formatted) >= sizeof(filename)) {
674 fprintf(stderr, "Filename %s for saving image is too long.\n", filename);
675 continue;
676 }
677
678 std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
679 if (png_encoding.empty()) {
680 fprintf(stderr,
681 "Failed to convert image object #%d, on page #%d to png.\n",
682 i + 1, page_num + 1);
683 continue;
684 }
685
686 WriteBufferToFile(&png_encoding.front(), png_encoding.size(), filename,
687 "image");
688 }
689 }
690
WriteDecodedThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)691 void WriteDecodedThumbnailStream(FPDF_PAGE page,
692 const char* pdf_name,
693 int page_num) {
694 char filename[256];
695 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
696 ThumbnailDecodeType::kDecodedStream)) {
697 return;
698 }
699
700 unsigned long decoded_data_size =
701 FPDFPage_GetDecodedThumbnailData(page, nullptr, 0u);
702
703 // Only continue if there actually is a thumbnail for this page
704 if (decoded_data_size == 0) {
705 fprintf(stderr, "Failed to get decoded thumbnail for page #%d.\n",
706 page_num + 1);
707 return;
708 }
709
710 std::vector<uint8_t> thumb_buf(decoded_data_size);
711 if (FPDFPage_GetDecodedThumbnailData(
712 page, thumb_buf.data(), decoded_data_size) != decoded_data_size) {
713 fprintf(stderr, "Failed to get decoded thumbnail data for %s.\n", filename);
714 return;
715 }
716
717 WriteBufferToFile(thumb_buf.data(), decoded_data_size, filename,
718 "decoded thumbnail");
719 }
720
WriteRawThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)721 void WriteRawThumbnailStream(FPDF_PAGE page,
722 const char* pdf_name,
723 int page_num) {
724 char filename[256];
725 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
726 ThumbnailDecodeType::kRawStream)) {
727 return;
728 }
729
730 unsigned long raw_data_size = FPDFPage_GetRawThumbnailData(page, nullptr, 0u);
731
732 // Only continue if there actually is a thumbnail for this page
733 if (raw_data_size == 0) {
734 fprintf(stderr, "Failed to get raw thumbnail data for page #%d.\n",
735 page_num + 1);
736 return;
737 }
738
739 std::vector<uint8_t> thumb_buf(raw_data_size);
740 if (FPDFPage_GetRawThumbnailData(page, thumb_buf.data(), raw_data_size) !=
741 raw_data_size) {
742 fprintf(stderr, "Failed to get raw thumbnail data for %s.\n", filename);
743 return;
744 }
745
746 WriteBufferToFile(thumb_buf.data(), raw_data_size, filename, "raw thumbnail");
747 }
748
WriteThumbnail(FPDF_PAGE page,const char * pdf_name,int page_num)749 void WriteThumbnail(FPDF_PAGE page, const char* pdf_name, int page_num) {
750 char filename[256];
751 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
752 ThumbnailDecodeType::kBitmap)) {
753 return;
754 }
755
756 ScopedFPDFBitmap thumb_bitmap(FPDFPage_GetThumbnailAsBitmap(page));
757 if (!thumb_bitmap) {
758 fprintf(stderr, "Thumbnail of page #%d has an empty bitmap.\n",
759 page_num + 1);
760 return;
761 }
762
763 std::vector<uint8_t> png_encoding =
764 EncodeBitmapToPng(std::move(thumb_bitmap));
765 if (png_encoding.empty()) {
766 fprintf(stderr, "Failed to convert thumbnail of page #%d to png.\n",
767 page_num + 1);
768 return;
769 }
770
771 WriteBufferToFile(&png_encoding.front(), png_encoding.size(), filename,
772 "thumbnail");
773 }
774