1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "samples/pdfium_test_write_helper.h"
6
7 #include <limits.h>
8
9 #include <sstream>
10 #include <string>
11 #include <utility>
12 #include <vector>
13
14 #include "public/cpp/fpdf_scopers.h"
15 #include "public/fpdf_annot.h"
16 #include "public/fpdf_attachment.h"
17 #include "public/fpdf_edit.h"
18 #include "public/fpdf_thumbnail.h"
19 #include "testing/fx_string_testhelpers.h"
20 #include "testing/image_diff/image_diff_png.h"
21 #include "third_party/base/notreached.h"
22
23 #ifdef PDF_ENABLE_SKIA
24 #include "third_party/skia/include/core/SkPicture.h" // nogncheck
25 #include "third_party/skia/include/core/SkStream.h" // nogncheck
26 #endif
27
28 namespace {
29
CheckDimensions(int stride,int width,int height)30 bool CheckDimensions(int stride, int width, int height) {
31 if (stride < 0 || width < 0 || height < 0)
32 return false;
33 if (height > 0 && stride > INT_MAX / height)
34 return false;
35 return true;
36 }
37
AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype)38 const char* AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype) {
39 if (subtype == FPDF_ANNOT_TEXT)
40 return "Text";
41 if (subtype == FPDF_ANNOT_LINK)
42 return "Link";
43 if (subtype == FPDF_ANNOT_FREETEXT)
44 return "FreeText";
45 if (subtype == FPDF_ANNOT_LINE)
46 return "Line";
47 if (subtype == FPDF_ANNOT_SQUARE)
48 return "Square";
49 if (subtype == FPDF_ANNOT_CIRCLE)
50 return "Circle";
51 if (subtype == FPDF_ANNOT_POLYGON)
52 return "Polygon";
53 if (subtype == FPDF_ANNOT_POLYLINE)
54 return "PolyLine";
55 if (subtype == FPDF_ANNOT_HIGHLIGHT)
56 return "Highlight";
57 if (subtype == FPDF_ANNOT_UNDERLINE)
58 return "Underline";
59 if (subtype == FPDF_ANNOT_SQUIGGLY)
60 return "Squiggly";
61 if (subtype == FPDF_ANNOT_STRIKEOUT)
62 return "StrikeOut";
63 if (subtype == FPDF_ANNOT_STAMP)
64 return "Stamp";
65 if (subtype == FPDF_ANNOT_CARET)
66 return "Caret";
67 if (subtype == FPDF_ANNOT_INK)
68 return "Ink";
69 if (subtype == FPDF_ANNOT_POPUP)
70 return "Popup";
71 if (subtype == FPDF_ANNOT_FILEATTACHMENT)
72 return "FileAttachment";
73 if (subtype == FPDF_ANNOT_SOUND)
74 return "Sound";
75 if (subtype == FPDF_ANNOT_MOVIE)
76 return "Movie";
77 if (subtype == FPDF_ANNOT_WIDGET)
78 return "Widget";
79 if (subtype == FPDF_ANNOT_SCREEN)
80 return "Screen";
81 if (subtype == FPDF_ANNOT_PRINTERMARK)
82 return "PrinterMark";
83 if (subtype == FPDF_ANNOT_TRAPNET)
84 return "TrapNet";
85 if (subtype == FPDF_ANNOT_WATERMARK)
86 return "Watermark";
87 if (subtype == FPDF_ANNOT_THREED)
88 return "3D";
89 if (subtype == FPDF_ANNOT_RICHMEDIA)
90 return "RichMedia";
91 if (subtype == FPDF_ANNOT_XFAWIDGET)
92 return "XFAWidget";
93 NOTREACHED();
94 return "";
95 }
96
AppendFlagString(const char * flag,std::string * output)97 void AppendFlagString(const char* flag, std::string* output) {
98 if (!output->empty())
99 *output += ", ";
100 *output += flag;
101 }
102
AnnotFlagsToString(int flags)103 std::string AnnotFlagsToString(int flags) {
104 std::string str;
105 if (flags & FPDF_ANNOT_FLAG_INVISIBLE)
106 AppendFlagString("Invisible", &str);
107 if (flags & FPDF_ANNOT_FLAG_HIDDEN)
108 AppendFlagString("Hidden", &str);
109 if (flags & FPDF_ANNOT_FLAG_PRINT)
110 AppendFlagString("Print", &str);
111 if (flags & FPDF_ANNOT_FLAG_NOZOOM)
112 AppendFlagString("NoZoom", &str);
113 if (flags & FPDF_ANNOT_FLAG_NOROTATE)
114 AppendFlagString("NoRotate", &str);
115 if (flags & FPDF_ANNOT_FLAG_NOVIEW)
116 AppendFlagString("NoView", &str);
117 if (flags & FPDF_ANNOT_FLAG_READONLY)
118 AppendFlagString("ReadOnly", &str);
119 if (flags & FPDF_ANNOT_FLAG_LOCKED)
120 AppendFlagString("Locked", &str);
121 if (flags & FPDF_ANNOT_FLAG_TOGGLENOVIEW)
122 AppendFlagString("ToggleNoView", &str);
123 return str;
124 }
125
PageObjectTypeToCString(int type)126 const char* PageObjectTypeToCString(int type) {
127 if (type == FPDF_PAGEOBJ_TEXT)
128 return "Text";
129 if (type == FPDF_PAGEOBJ_PATH)
130 return "Path";
131 if (type == FPDF_PAGEOBJ_IMAGE)
132 return "Image";
133 if (type == FPDF_PAGEOBJ_SHADING)
134 return "Shading";
135 if (type == FPDF_PAGEOBJ_FORM)
136 return "Form";
137 NOTREACHED();
138 return "";
139 }
140
EncodePng(pdfium::span<const uint8_t> input,int width,int height,int stride,int format)141 std::vector<uint8_t> EncodePng(pdfium::span<const uint8_t> input,
142 int width,
143 int height,
144 int stride,
145 int format) {
146 std::vector<uint8_t> png;
147 switch (format) {
148 case FPDFBitmap_Unknown:
149 break;
150 case FPDFBitmap_Gray:
151 png = image_diff_png::EncodeGrayPNG(input, width, height, stride);
152 break;
153 case FPDFBitmap_BGR:
154 png = image_diff_png::EncodeBGRPNG(input, width, height, stride);
155 break;
156 case FPDFBitmap_BGRx:
157 png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
158 /*discard_transparency=*/true);
159 break;
160 case FPDFBitmap_BGRA:
161 png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
162 /*discard_transparency=*/false);
163 break;
164 default:
165 NOTREACHED();
166 }
167 return png;
168 }
169
170 #ifdef _WIN32
EnhMetaFileProc(HDC hdc,HANDLETABLE * handle_table,const ENHMETARECORD * record,int objects_count,LPARAM param)171 int CALLBACK EnhMetaFileProc(HDC hdc,
172 HANDLETABLE* handle_table,
173 const ENHMETARECORD* record,
174 int objects_count,
175 LPARAM param) {
176 std::vector<const ENHMETARECORD*>& items =
177 *reinterpret_cast<std::vector<const ENHMETARECORD*>*>(param);
178 items.push_back(record);
179 return 1;
180 }
181 #endif // _WIN32
182
GeneratePageOutputFilename(const char * pdf_name,int page_num,const char * extension)183 std::string GeneratePageOutputFilename(const char* pdf_name,
184 int page_num,
185 const char* extension) {
186 std::ostringstream stream;
187 stream << pdf_name << "." << page_num << "." << extension;
188 std::string filename = stream.str();
189 if (filename.size() >= 256) {
190 fprintf(stderr, "Filename %s is too long\n", filename.c_str());
191 return std::string();
192 }
193
194 return filename;
195 }
196
GenerateImageOutputFilename(const char * pdf_name,int page_num,int image_num,const char * extension)197 std::string GenerateImageOutputFilename(const char* pdf_name,
198 int page_num,
199 int image_num,
200 const char* extension) {
201 std::ostringstream stream;
202 stream << pdf_name << "." << page_num << "." << image_num << "." << extension;
203 std::string filename = stream.str();
204 if (filename.size() >= 256) {
205 fprintf(stderr, "Filename %s for saving image is too long.\n",
206 filename.c_str());
207 return std::string();
208 }
209
210 return filename;
211 }
212
213 } // namespace
214
WritePpm(const char * pdf_name,int num,void * buffer_void,int stride,int width,int height)215 std::string WritePpm(const char* pdf_name,
216 int num,
217 void* buffer_void,
218 int stride,
219 int width,
220 int height) {
221 if (!CheckDimensions(stride, width, height)) {
222 return "";
223 }
224
225 int out_len = width * height;
226 if (out_len > INT_MAX / 3) {
227 return "";
228 }
229
230 out_len *= 3;
231
232 std::string filename = GeneratePageOutputFilename(pdf_name, num, "ppm");
233 if (filename.empty()) {
234 return std::string();
235 }
236 FILE* fp = fopen(filename.c_str(), "wb");
237 if (!fp) {
238 return std::string();
239 }
240
241 fprintf(fp, "P6\n# PDF test render\n%d %d\n255\n", width, height);
242 // Source data is B, G, R, unused.
243 // Dest data is R, G, B.
244 const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buffer_void);
245 std::vector<uint8_t> result(out_len);
246 for (int h = 0; h < height; ++h) {
247 const uint8_t* src_line = buffer + (stride * h);
248 uint8_t* dest_line = result.data() + (width * h * 3);
249 for (int w = 0; w < width; ++w) {
250 // R
251 dest_line[w * 3] = src_line[(w * 4) + 2];
252 // G
253 dest_line[(w * 3) + 1] = src_line[(w * 4) + 1];
254 // B
255 dest_line[(w * 3) + 2] = src_line[w * 4];
256 }
257 }
258 if (fwrite(result.data(), out_len, 1, fp) != 1) {
259 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
260 }
261
262 fclose(fp);
263 return filename;
264 }
265
WriteText(FPDF_TEXTPAGE textpage,const char * pdf_name,int num)266 void WriteText(FPDF_TEXTPAGE textpage, const char* pdf_name, int num) {
267 std::string filename = GeneratePageOutputFilename(pdf_name, num, "txt");
268 if (filename.empty()) {
269 return;
270 }
271 FILE* fp = fopen(filename.c_str(), "w");
272 if (!fp) {
273 fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
274 return;
275 }
276
277 // Output in UTF32-LE.
278 uint32_t bom = 0x0000FEFF;
279 if (fwrite(&bom, sizeof(bom), 1, fp) != 1) {
280 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
281 (void)fclose(fp);
282 return;
283 }
284
285 for (int i = 0; i < FPDFText_CountChars(textpage); i++) {
286 uint32_t c = FPDFText_GetUnicode(textpage, i);
287 if (fwrite(&c, sizeof(c), 1, fp) != 1) {
288 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
289 break;
290 }
291 }
292 (void)fclose(fp);
293 }
294
WriteAnnot(FPDF_PAGE page,const char * pdf_name,int num)295 void WriteAnnot(FPDF_PAGE page, const char* pdf_name, int num) {
296 // Open the output text file.
297 std::string filename = GeneratePageOutputFilename(pdf_name, num, "annot.txt");
298 if (filename.empty()) {
299 return;
300 }
301 FILE* fp = fopen(filename.c_str(), "w");
302 if (!fp) {
303 fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
304 return;
305 }
306
307 int annot_count = FPDFPage_GetAnnotCount(page);
308 fprintf(fp, "Number of annotations: %d\n\n", annot_count);
309
310 // Iterate through all annotations on this page.
311 for (int i = 0; i < annot_count; ++i) {
312 // Retrieve the annotation object and its subtype.
313 fprintf(fp, "Annotation #%d:\n", i + 1);
314 ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
315 if (!annot) {
316 fprintf(fp, "Failed to retrieve annotation!\n\n");
317 continue;
318 }
319
320 FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
321 fprintf(fp, "Subtype: %s\n", AnnotSubtypeToCString(subtype));
322
323 // Retrieve the annotation flags.
324 fprintf(fp, "Flags set: %s\n",
325 AnnotFlagsToString(FPDFAnnot_GetFlags(annot.get())).c_str());
326
327 // Retrieve the annotation's object count and object types.
328 const int obj_count = FPDFAnnot_GetObjectCount(annot.get());
329 fprintf(fp, "Number of objects: %d\n", obj_count);
330 if (obj_count > 0) {
331 fprintf(fp, "Object types: ");
332 for (int j = 0; j < obj_count; ++j) {
333 const char* type = PageObjectTypeToCString(
334 FPDFPageObj_GetType(FPDFAnnot_GetObject(annot.get(), j)));
335 fprintf(fp, "%s ", type);
336 }
337 fprintf(fp, "\n");
338 }
339
340 // Retrieve the annotation's color and interior color.
341 unsigned int R;
342 unsigned int G;
343 unsigned int B;
344 unsigned int A;
345 if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_Color, &R, &G, &B,
346 &A)) {
347 fprintf(fp, "Color in RGBA: %d %d %d %d\n", R, G, B, A);
348 } else {
349 fprintf(fp, "Failed to retrieve color.\n");
350 }
351 if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_InteriorColor, &R,
352 &G, &B, &A)) {
353 fprintf(fp, "Interior color in RGBA: %d %d %d %d\n", R, G, B, A);
354 } else {
355 fprintf(fp, "Failed to retrieve interior color.\n");
356 }
357
358 // Retrieve the annotation's contents and author.
359 static constexpr char kContentsKey[] = "Contents";
360 static constexpr char kAuthorKey[] = "T";
361 unsigned long length_bytes =
362 FPDFAnnot_GetStringValue(annot.get(), kContentsKey, nullptr, 0);
363 std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
364 FPDFAnnot_GetStringValue(annot.get(), kContentsKey, buf.data(),
365 length_bytes);
366 fprintf(fp, "Content: %ls\n", GetPlatformWString(buf.data()).c_str());
367 length_bytes =
368 FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, nullptr, 0);
369 buf = GetFPDFWideStringBuffer(length_bytes);
370 FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, buf.data(), length_bytes);
371 fprintf(fp, "Author: %ls\n", GetPlatformWString(buf.data()).c_str());
372
373 // Retrieve the annotation's quadpoints if it is a markup annotation.
374 if (FPDFAnnot_HasAttachmentPoints(annot.get())) {
375 size_t qp_count = FPDFAnnot_CountAttachmentPoints(annot.get());
376 fprintf(fp, "Number of quadpoints sets: %zu\n", qp_count);
377
378 // Iterate through all quadpoints of the current annotation
379 for (size_t j = 0; j < qp_count; ++j) {
380 FS_QUADPOINTSF quadpoints;
381 if (FPDFAnnot_GetAttachmentPoints(annot.get(), j, &quadpoints)) {
382 fprintf(fp,
383 "Quadpoints set #%zu: (%.3f, %.3f), (%.3f, %.3f), "
384 "(%.3f, %.3f), (%.3f, %.3f)\n",
385 j + 1, quadpoints.x1, quadpoints.y1, quadpoints.x2,
386 quadpoints.y2, quadpoints.x3, quadpoints.y3, quadpoints.x4,
387 quadpoints.y4);
388 } else {
389 fprintf(fp, "Failed to retrieve quadpoints set #%zu.\n", j + 1);
390 }
391 }
392 }
393
394 // Retrieve the annotation's rectangle coordinates.
395 FS_RECTF rect;
396 if (FPDFAnnot_GetRect(annot.get(), &rect)) {
397 fprintf(fp, "Rectangle: l - %.3f, b - %.3f, r - %.3f, t - %.3f\n\n",
398 rect.left, rect.bottom, rect.right, rect.top);
399 } else {
400 fprintf(fp, "Failed to retrieve annotation rectangle.\n");
401 }
402 }
403
404 (void)fclose(fp);
405 }
406
WritePng(const char * pdf_name,int num,void * buffer,int stride,int width,int height)407 std::string WritePng(const char* pdf_name,
408 int num,
409 void* buffer,
410 int stride,
411 int width,
412 int height) {
413 if (!CheckDimensions(stride, width, height)) {
414 return "";
415 }
416
417 auto input =
418 pdfium::make_span(static_cast<uint8_t*>(buffer), stride * height);
419 std::vector<uint8_t> png_encoding =
420 EncodePng(input, width, height, stride, FPDFBitmap_BGRA);
421 if (png_encoding.empty()) {
422 fprintf(stderr, "Failed to convert bitmap to PNG\n");
423 return "";
424 }
425
426 std::string filename = GeneratePageOutputFilename(pdf_name, num, "png");
427 if (filename.empty()) {
428 return std::string();
429 }
430 FILE* fp = fopen(filename.c_str(), "wb");
431 if (!fp) {
432 fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
433 return std::string();
434 }
435
436 size_t bytes_written =
437 fwrite(&png_encoding.front(), 1, png_encoding.size(), fp);
438 if (bytes_written != png_encoding.size()) {
439 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
440 }
441
442 (void)fclose(fp);
443 return filename;
444 }
445
446 #ifdef _WIN32
WriteBmp(const char * pdf_name,int num,void * buffer,int stride,int width,int height)447 std::string WriteBmp(const char* pdf_name,
448 int num,
449 void* buffer,
450 int stride,
451 int width,
452 int height) {
453 if (!CheckDimensions(stride, width, height)) {
454 return std::string();
455 }
456
457 int out_len = stride * height;
458 if (out_len > INT_MAX / 3) {
459 return std::string();
460 }
461
462 std::string filename = GeneratePageOutputFilename(pdf_name, num, "bmp");
463 if (filename.empty()) {
464 return std::string();
465 }
466 FILE* fp = fopen(filename.c_str(), "wb");
467 if (!fp) {
468 return std::string();
469 }
470
471 BITMAPINFO bmi = {};
472 bmi.bmiHeader.biSize = sizeof(bmi) - sizeof(RGBQUAD);
473 bmi.bmiHeader.biWidth = width;
474 bmi.bmiHeader.biHeight = -height; // top-down image
475 bmi.bmiHeader.biPlanes = 1;
476 bmi.bmiHeader.biBitCount = 32;
477 bmi.bmiHeader.biCompression = BI_RGB;
478 bmi.bmiHeader.biSizeImage = 0;
479
480 BITMAPFILEHEADER file_header = {};
481 file_header.bfType = 0x4d42;
482 file_header.bfSize = sizeof(file_header) + bmi.bmiHeader.biSize + out_len;
483 file_header.bfOffBits = file_header.bfSize - out_len;
484
485 if (fwrite(&file_header, sizeof(file_header), 1, fp) != 1 ||
486 fwrite(&bmi, bmi.bmiHeader.biSize, 1, fp) != 1 ||
487 fwrite(buffer, out_len, 1, fp) != 1) {
488 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
489 }
490 fclose(fp);
491 return filename;
492 }
493
WriteEmf(FPDF_PAGE page,const char * pdf_name,int num)494 void WriteEmf(FPDF_PAGE page, const char* pdf_name, int num) {
495 std::string filename = GeneratePageOutputFilename(pdf_name, num, "emf");
496 if (filename.empty()) {
497 return;
498 }
499
500 HDC dc = CreateEnhMetaFileA(nullptr, filename.c_str(), nullptr, nullptr);
501
502 int width = static_cast<int>(FPDF_GetPageWidthF(page));
503 int height = static_cast<int>(FPDF_GetPageHeightF(page));
504 HRGN rgn = CreateRectRgn(0, 0, width, height);
505 SelectClipRgn(dc, rgn);
506 DeleteObject(rgn);
507
508 SelectObject(dc, GetStockObject(NULL_PEN));
509 SelectObject(dc, GetStockObject(WHITE_BRUSH));
510 // If a PS_NULL pen is used, the dimensions of the rectangle are 1 pixel less.
511 Rectangle(dc, 0, 0, width + 1, height + 1);
512
513 FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
514
515 DeleteEnhMetaFile(CloseEnhMetaFile(dc));
516 }
517
WritePS(FPDF_PAGE page,const char * pdf_name,int num)518 void WritePS(FPDF_PAGE page, const char* pdf_name, int num) {
519 std::string filename = GeneratePageOutputFilename(pdf_name, num, "ps");
520 if (filename.empty()) {
521 return;
522 }
523 FILE* fp = fopen(filename.c_str(), "wb");
524 if (!fp)
525 return;
526
527 HDC dc = CreateEnhMetaFileA(nullptr, nullptr, nullptr, nullptr);
528
529 int width = static_cast<int>(FPDF_GetPageWidthF(page));
530 int height = static_cast<int>(FPDF_GetPageHeightF(page));
531 FPDF_RenderPage(dc, page, 0, 0, width, height, 0, FPDF_ANNOT | FPDF_PRINTING);
532
533 HENHMETAFILE emf = CloseEnhMetaFile(dc);
534 std::vector<const ENHMETARECORD*> items;
535 EnumEnhMetaFile(nullptr, emf, &EnhMetaFileProc, &items, nullptr);
536 for (const ENHMETARECORD* record : items) {
537 if (record->iType != EMR_GDICOMMENT)
538 continue;
539
540 const auto* comment = reinterpret_cast<const EMRGDICOMMENT*>(record);
541 const char* data = reinterpret_cast<const char*>(comment->Data);
542 uint16_t size = *reinterpret_cast<const uint16_t*>(data);
543 if (fwrite(data + sizeof(uint16_t), size, 1, fp) != 1) {
544 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
545 break;
546 }
547 }
548 fclose(fp);
549 DeleteEnhMetaFile(emf);
550 }
551 #endif // _WIN32
552
553 #ifdef PDF_ENABLE_SKIA
WriteSkp(const char * pdf_name,int num,const SkPicture & picture)554 std::string WriteSkp(const char* pdf_name, int num, const SkPicture& picture) {
555 std::string filename = GeneratePageOutputFilename(pdf_name, num, "skp");
556 if (filename.empty()) {
557 return filename;
558 }
559 SkFILEWStream wStream(filename.c_str());
560 picture.serialize(&wStream);
561 return filename;
562 }
563 #endif
564
565 enum class ThumbnailDecodeType { kBitmap, kRawStream, kDecodedStream };
566
GetThumbnailFilename(char * name_buf,size_t name_buf_size,const char * pdf_name,int page_num,ThumbnailDecodeType decode_type)567 bool GetThumbnailFilename(char* name_buf,
568 size_t name_buf_size,
569 const char* pdf_name,
570 int page_num,
571 ThumbnailDecodeType decode_type) {
572 const char* format;
573 switch (decode_type) {
574 case ThumbnailDecodeType::kBitmap:
575 format = "%s.thumbnail.%d.png";
576 break;
577 case ThumbnailDecodeType::kDecodedStream:
578 format = "%s.thumbnail.decoded.%d.bin";
579 break;
580 case ThumbnailDecodeType::kRawStream:
581 format = "%s.thumbnail.raw.%d.bin";
582 break;
583 }
584
585 int chars_formatted =
586 snprintf(name_buf, name_buf_size, format, pdf_name, page_num);
587 if (chars_formatted < 0 ||
588 static_cast<size_t>(chars_formatted) >= name_buf_size) {
589 fprintf(stderr, "Filename %s for saving is too long.\n", name_buf);
590 return false;
591 }
592
593 return true;
594 }
595
WriteBufferToFile(const void * buf,size_t buflen,const char * filename,const char * filetype)596 void WriteBufferToFile(const void* buf,
597 size_t buflen,
598 const char* filename,
599 const char* filetype) {
600 FILE* fp = fopen(filename, "wb");
601 if (!fp) {
602 fprintf(stderr, "Failed to open %s for saving %s.", filename, filetype);
603 return;
604 }
605
606 size_t bytes_written = fwrite(buf, 1, buflen, fp);
607 if (bytes_written == buflen)
608 fprintf(stderr, "Successfully wrote %s %s.\n", filetype, filename);
609 else
610 fprintf(stderr, "Failed to write to %s.\n", filename);
611 fclose(fp);
612 }
613
EncodeBitmapToPng(ScopedFPDFBitmap bitmap)614 std::vector<uint8_t> EncodeBitmapToPng(ScopedFPDFBitmap bitmap) {
615 std::vector<uint8_t> png_encoding;
616 int format = FPDFBitmap_GetFormat(bitmap.get());
617 if (format == FPDFBitmap_Unknown)
618 return png_encoding;
619
620 int width = FPDFBitmap_GetWidth(bitmap.get());
621 int height = FPDFBitmap_GetHeight(bitmap.get());
622 int stride = FPDFBitmap_GetStride(bitmap.get());
623 if (!CheckDimensions(stride, width, height))
624 return png_encoding;
625
626 auto input = pdfium::make_span(
627 static_cast<const uint8_t*>(FPDFBitmap_GetBuffer(bitmap.get())),
628 stride * height);
629
630 png_encoding = EncodePng(input, width, height, stride, format);
631 return png_encoding;
632 }
633
WriteAttachments(FPDF_DOCUMENT doc,const std::string & name)634 void WriteAttachments(FPDF_DOCUMENT doc, const std::string& name) {
635 for (int i = 0; i < FPDFDoc_GetAttachmentCount(doc); ++i) {
636 FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(doc, i);
637
638 // Retrieve the attachment file name.
639 std::string attachment_name;
640 unsigned long length_bytes = FPDFAttachment_GetName(attachment, nullptr, 0);
641 if (length_bytes) {
642 std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
643 unsigned long actual_length_bytes =
644 FPDFAttachment_GetName(attachment, buf.data(), length_bytes);
645 if (actual_length_bytes == length_bytes)
646 attachment_name = GetPlatformString(buf.data());
647 }
648 if (attachment_name.empty()) {
649 fprintf(stderr, "Attachment #%d has an empty file name.\n", i + 1);
650 continue;
651 }
652
653 // Calculate the full attachment file name.
654 char save_name[256];
655 int chars_formatted =
656 snprintf(save_name, sizeof(save_name), "%s.attachment.%s", name.c_str(),
657 attachment_name.c_str());
658 if (chars_formatted < 0 ||
659 static_cast<size_t>(chars_formatted) >= sizeof(save_name)) {
660 fprintf(stderr, "Filename %s is too long.\n", save_name);
661 continue;
662 }
663
664 // Retrieve the attachment.
665 if (!FPDFAttachment_GetFile(attachment, nullptr, 0, &length_bytes)) {
666 fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
667 attachment_name.c_str());
668 continue;
669 }
670
671 std::vector<char> data_buf(length_bytes);
672 if (length_bytes) {
673 unsigned long actual_length_bytes;
674 if (!FPDFAttachment_GetFile(attachment, data_buf.data(), length_bytes,
675 &actual_length_bytes)) {
676 fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
677 attachment_name.c_str());
678 continue;
679 }
680 }
681
682 // Write the attachment file. Since a PDF document could have 0-byte files
683 // as attachments, we should allow saving the 0-byte attachments to files.
684 WriteBufferToFile(data_buf.data(), length_bytes, save_name, "attachment");
685 }
686 }
687
WriteImages(FPDF_PAGE page,const char * pdf_name,int page_num)688 void WriteImages(FPDF_PAGE page, const char* pdf_name, int page_num) {
689 for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
690 FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
691 if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
692 continue;
693 }
694
695 ScopedFPDFBitmap bitmap(FPDFImageObj_GetBitmap(obj));
696 if (!bitmap) {
697 fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
698 i + 1, page_num + 1);
699 continue;
700 }
701
702 std::string filename =
703 GenerateImageOutputFilename(pdf_name, page_num, i, "png");
704 if (filename.empty()) {
705 continue;
706 }
707
708 std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
709 if (png_encoding.empty()) {
710 fprintf(stderr,
711 "Failed to convert image object #%d, on page #%d to png.\n",
712 i + 1, page_num + 1);
713 continue;
714 }
715
716 WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
717 filename.c_str(), "image");
718 }
719 }
720
WriteRenderedImages(FPDF_DOCUMENT doc,FPDF_PAGE page,const char * pdf_name,int page_num)721 void WriteRenderedImages(FPDF_DOCUMENT doc,
722 FPDF_PAGE page,
723 const char* pdf_name,
724 int page_num) {
725 for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
726 FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
727 if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
728 continue;
729 }
730
731 ScopedFPDFBitmap bitmap(FPDFImageObj_GetRenderedBitmap(doc, page, obj));
732 if (!bitmap) {
733 fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
734 i + 1, page_num + 1);
735 continue;
736 }
737
738 std::string filename =
739 GenerateImageOutputFilename(pdf_name, page_num, i, "png");
740 if (filename.empty()) {
741 continue;
742 }
743
744 std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
745 if (png_encoding.empty()) {
746 fprintf(stderr,
747 "Failed to convert image object #%d, on page #%d to png.\n",
748 i + 1, page_num + 1);
749 continue;
750 }
751
752 WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
753 filename.c_str(), "image");
754 }
755 }
756
WriteDecodedThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)757 void WriteDecodedThumbnailStream(FPDF_PAGE page,
758 const char* pdf_name,
759 int page_num) {
760 char filename[256];
761 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
762 ThumbnailDecodeType::kDecodedStream)) {
763 return;
764 }
765
766 unsigned long decoded_data_size =
767 FPDFPage_GetDecodedThumbnailData(page, nullptr, 0u);
768
769 // Only continue if there actually is a thumbnail for this page
770 if (decoded_data_size == 0) {
771 fprintf(stderr, "Failed to get decoded thumbnail for page #%d.\n",
772 page_num + 1);
773 return;
774 }
775
776 std::vector<uint8_t> thumb_buf(decoded_data_size);
777 if (FPDFPage_GetDecodedThumbnailData(
778 page, thumb_buf.data(), decoded_data_size) != decoded_data_size) {
779 fprintf(stderr, "Failed to get decoded thumbnail data for %s.\n", filename);
780 return;
781 }
782
783 WriteBufferToFile(thumb_buf.data(), decoded_data_size, filename,
784 "decoded thumbnail");
785 }
786
WriteRawThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)787 void WriteRawThumbnailStream(FPDF_PAGE page,
788 const char* pdf_name,
789 int page_num) {
790 char filename[256];
791 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
792 ThumbnailDecodeType::kRawStream)) {
793 return;
794 }
795
796 unsigned long raw_data_size = FPDFPage_GetRawThumbnailData(page, nullptr, 0u);
797
798 // Only continue if there actually is a thumbnail for this page
799 if (raw_data_size == 0) {
800 fprintf(stderr, "Failed to get raw thumbnail data for page #%d.\n",
801 page_num + 1);
802 return;
803 }
804
805 std::vector<uint8_t> thumb_buf(raw_data_size);
806 if (FPDFPage_GetRawThumbnailData(page, thumb_buf.data(), raw_data_size) !=
807 raw_data_size) {
808 fprintf(stderr, "Failed to get raw thumbnail data for %s.\n", filename);
809 return;
810 }
811
812 WriteBufferToFile(thumb_buf.data(), raw_data_size, filename, "raw thumbnail");
813 }
814
WriteThumbnail(FPDF_PAGE page,const char * pdf_name,int page_num)815 void WriteThumbnail(FPDF_PAGE page, const char* pdf_name, int page_num) {
816 char filename[256];
817 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
818 ThumbnailDecodeType::kBitmap)) {
819 return;
820 }
821
822 ScopedFPDFBitmap thumb_bitmap(FPDFPage_GetThumbnailAsBitmap(page));
823 if (!thumb_bitmap) {
824 fprintf(stderr, "Thumbnail of page #%d has an empty bitmap.\n",
825 page_num + 1);
826 return;
827 }
828
829 std::vector<uint8_t> png_encoding =
830 EncodeBitmapToPng(std::move(thumb_bitmap));
831 if (png_encoding.empty()) {
832 fprintf(stderr, "Failed to convert thumbnail of page #%d to png.\n",
833 page_num + 1);
834 return;
835 }
836
837 WriteBufferToFile(&png_encoding.front(), png_encoding.size(), filename,
838 "thumbnail");
839 }
840