1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "testing/helpers/write.h"
6
7 #include <limits.h>
8
9 #include <sstream>
10 #include <string>
11 #include <utility>
12 #include <vector>
13
14 #include "core/fxcrt/check.h"
15 #include "core/fxcrt/notreached.h"
16 #include "public/cpp/fpdf_scopers.h"
17 #include "public/fpdf_annot.h"
18 #include "public/fpdf_attachment.h"
19 #include "public/fpdf_edit.h"
20 #include "public/fpdf_thumbnail.h"
21 #include "testing/fx_string_testhelpers.h"
22 #include "testing/image_diff/image_diff_png.h"
23
24 #ifdef PDF_ENABLE_SKIA
25 #include "third_party/skia/include/core/SkPicture.h" // nogncheck
26 #include "third_party/skia/include/core/SkSerialProcs.h" // nogncheck
27 #include "third_party/skia/include/core/SkStream.h" // nogncheck
28 #include "third_party/skia/include/encode/SkPngEncoder.h" // nogncheck
29 #endif
30
31 namespace {
32
CheckDimensions(int stride,int width,int height)33 bool CheckDimensions(int stride, int width, int height) {
34 if (stride < 0 || width < 0 || height < 0) {
35 return false;
36 }
37 if (height > 0 && stride > INT_MAX / height) {
38 return false;
39 }
40 return true;
41 }
42
AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype)43 const char* AnnotSubtypeToCString(FPDF_ANNOTATION_SUBTYPE subtype) {
44 if (subtype == FPDF_ANNOT_TEXT) {
45 return "Text";
46 }
47 if (subtype == FPDF_ANNOT_LINK) {
48 return "Link";
49 }
50 if (subtype == FPDF_ANNOT_FREETEXT) {
51 return "FreeText";
52 }
53 if (subtype == FPDF_ANNOT_LINE) {
54 return "Line";
55 }
56 if (subtype == FPDF_ANNOT_SQUARE) {
57 return "Square";
58 }
59 if (subtype == FPDF_ANNOT_CIRCLE) {
60 return "Circle";
61 }
62 if (subtype == FPDF_ANNOT_POLYGON) {
63 return "Polygon";
64 }
65 if (subtype == FPDF_ANNOT_POLYLINE) {
66 return "PolyLine";
67 }
68 if (subtype == FPDF_ANNOT_HIGHLIGHT) {
69 return "Highlight";
70 }
71 if (subtype == FPDF_ANNOT_UNDERLINE) {
72 return "Underline";
73 }
74 if (subtype == FPDF_ANNOT_SQUIGGLY) {
75 return "Squiggly";
76 }
77 if (subtype == FPDF_ANNOT_STRIKEOUT) {
78 return "StrikeOut";
79 }
80 if (subtype == FPDF_ANNOT_STAMP) {
81 return "Stamp";
82 }
83 if (subtype == FPDF_ANNOT_CARET) {
84 return "Caret";
85 }
86 if (subtype == FPDF_ANNOT_INK) {
87 return "Ink";
88 }
89 if (subtype == FPDF_ANNOT_POPUP) {
90 return "Popup";
91 }
92 if (subtype == FPDF_ANNOT_FILEATTACHMENT) {
93 return "FileAttachment";
94 }
95 if (subtype == FPDF_ANNOT_SOUND) {
96 return "Sound";
97 }
98 if (subtype == FPDF_ANNOT_MOVIE) {
99 return "Movie";
100 }
101 if (subtype == FPDF_ANNOT_WIDGET) {
102 return "Widget";
103 }
104 if (subtype == FPDF_ANNOT_SCREEN) {
105 return "Screen";
106 }
107 if (subtype == FPDF_ANNOT_PRINTERMARK) {
108 return "PrinterMark";
109 }
110 if (subtype == FPDF_ANNOT_TRAPNET) {
111 return "TrapNet";
112 }
113 if (subtype == FPDF_ANNOT_WATERMARK) {
114 return "Watermark";
115 }
116 if (subtype == FPDF_ANNOT_THREED) {
117 return "3D";
118 }
119 if (subtype == FPDF_ANNOT_RICHMEDIA) {
120 return "RichMedia";
121 }
122 if (subtype == FPDF_ANNOT_XFAWIDGET) {
123 return "XFAWidget";
124 }
125 NOTREACHED_NORETURN();
126 }
127
AppendFlagString(const char * flag,std::string * output)128 void AppendFlagString(const char* flag, std::string* output) {
129 if (!output->empty()) {
130 *output += ", ";
131 }
132 *output += flag;
133 }
134
AnnotFlagsToString(int flags)135 std::string AnnotFlagsToString(int flags) {
136 std::string str;
137 if (flags & FPDF_ANNOT_FLAG_INVISIBLE) {
138 AppendFlagString("Invisible", &str);
139 }
140 if (flags & FPDF_ANNOT_FLAG_HIDDEN) {
141 AppendFlagString("Hidden", &str);
142 }
143 if (flags & FPDF_ANNOT_FLAG_PRINT) {
144 AppendFlagString("Print", &str);
145 }
146 if (flags & FPDF_ANNOT_FLAG_NOZOOM) {
147 AppendFlagString("NoZoom", &str);
148 }
149 if (flags & FPDF_ANNOT_FLAG_NOROTATE) {
150 AppendFlagString("NoRotate", &str);
151 }
152 if (flags & FPDF_ANNOT_FLAG_NOVIEW) {
153 AppendFlagString("NoView", &str);
154 }
155 if (flags & FPDF_ANNOT_FLAG_READONLY) {
156 AppendFlagString("ReadOnly", &str);
157 }
158 if (flags & FPDF_ANNOT_FLAG_LOCKED) {
159 AppendFlagString("Locked", &str);
160 }
161 if (flags & FPDF_ANNOT_FLAG_TOGGLENOVIEW) {
162 AppendFlagString("ToggleNoView", &str);
163 }
164 return str;
165 }
166
PageObjectTypeToCString(int type)167 const char* PageObjectTypeToCString(int type) {
168 if (type == FPDF_PAGEOBJ_TEXT) {
169 return "Text";
170 }
171 if (type == FPDF_PAGEOBJ_PATH) {
172 return "Path";
173 }
174 if (type == FPDF_PAGEOBJ_IMAGE) {
175 return "Image";
176 }
177 if (type == FPDF_PAGEOBJ_SHADING) {
178 return "Shading";
179 }
180 if (type == FPDF_PAGEOBJ_FORM) {
181 return "Form";
182 }
183 NOTREACHED_NORETURN();
184 }
185
EncodePng(pdfium::span<const uint8_t> input,int width,int height,int stride,int format)186 std::vector<uint8_t> EncodePng(pdfium::span<const uint8_t> input,
187 int width,
188 int height,
189 int stride,
190 int format) {
191 std::vector<uint8_t> png;
192 switch (format) {
193 case FPDFBitmap_Unknown:
194 break;
195 case FPDFBitmap_Gray:
196 png = image_diff_png::EncodeGrayPNG(input, width, height, stride);
197 break;
198 case FPDFBitmap_BGR:
199 png = image_diff_png::EncodeBGRPNG(input, width, height, stride);
200 break;
201 case FPDFBitmap_BGRx:
202 png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
203 /*discard_transparency=*/true);
204 break;
205 case FPDFBitmap_BGRA:
206 png = image_diff_png::EncodeBGRAPNG(input, width, height, stride,
207 /*discard_transparency=*/false);
208 break;
209 default:
210 NOTREACHED_NORETURN();
211 }
212 return png;
213 }
214
215 #ifdef _WIN32
EnhMetaFileProc(HDC hdc,HANDLETABLE * handle_table,const ENHMETARECORD * record,int objects_count,LPARAM param)216 int CALLBACK EnhMetaFileProc(HDC hdc,
217 HANDLETABLE* handle_table,
218 const ENHMETARECORD* record,
219 int objects_count,
220 LPARAM param) {
221 std::vector<const ENHMETARECORD*>& items =
222 *reinterpret_cast<std::vector<const ENHMETARECORD*>*>(param);
223 items.push_back(record);
224 return 1;
225 }
226 #endif // _WIN32
227
GeneratePageOutputFilename(const char * pdf_name,int page_num,const char * extension)228 std::string GeneratePageOutputFilename(const char* pdf_name,
229 int page_num,
230 const char* extension) {
231 std::ostringstream stream;
232 stream << pdf_name << "." << page_num << "." << extension;
233 std::string filename = stream.str();
234 if (filename.size() >= 256) {
235 fprintf(stderr, "Filename %s is too long\n", filename.c_str());
236 return std::string();
237 }
238
239 return filename;
240 }
241
GenerateImageOutputFilename(const char * pdf_name,int page_num,int image_num,const char * extension)242 std::string GenerateImageOutputFilename(const char* pdf_name,
243 int page_num,
244 int image_num,
245 const char* extension) {
246 std::ostringstream stream;
247 stream << pdf_name << "." << page_num << "." << image_num << "." << extension;
248 std::string filename = stream.str();
249 if (filename.size() >= 256) {
250 fprintf(stderr, "Filename %s for saving image is too long.\n",
251 filename.c_str());
252 return std::string();
253 }
254
255 return filename;
256 }
257
258 } // namespace
259
WritePpm(const char * pdf_name,int num,void * buffer_void,int stride,int width,int height)260 std::string WritePpm(const char* pdf_name,
261 int num,
262 void* buffer_void,
263 int stride,
264 int width,
265 int height) {
266 if (!CheckDimensions(stride, width, height)) {
267 return "";
268 }
269
270 int out_len = width * height;
271 if (out_len > INT_MAX / 3) {
272 return "";
273 }
274
275 out_len *= 3;
276
277 std::string filename = GeneratePageOutputFilename(pdf_name, num, "ppm");
278 if (filename.empty()) {
279 return std::string();
280 }
281 FILE* fp = fopen(filename.c_str(), "wb");
282 if (!fp) {
283 return std::string();
284 }
285
286 fprintf(fp, "P6\n# PDF test render\n%d %d\n255\n", width, height);
287 // Source data is B, G, R, unused.
288 // Dest data is R, G, B.
289 const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buffer_void);
290 std::vector<uint8_t> result(out_len);
291 for (int h = 0; h < height; ++h) {
292 const uint8_t* src_line = buffer + (stride * h);
293 uint8_t* dest_line = result.data() + (width * h * 3);
294 for (int w = 0; w < width; ++w) {
295 // R
296 dest_line[w * 3] = src_line[(w * 4) + 2];
297 // G
298 dest_line[(w * 3) + 1] = src_line[(w * 4) + 1];
299 // B
300 dest_line[(w * 3) + 2] = src_line[w * 4];
301 }
302 }
303 if (fwrite(result.data(), out_len, 1, fp) != 1) {
304 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
305 }
306
307 fclose(fp);
308 return filename;
309 }
310
WriteText(FPDF_TEXTPAGE textpage,const char * pdf_name,int num)311 void WriteText(FPDF_TEXTPAGE textpage, const char* pdf_name, int num) {
312 std::string filename = GeneratePageOutputFilename(pdf_name, num, "txt");
313 if (filename.empty()) {
314 return;
315 }
316 FILE* fp = fopen(filename.c_str(), "w");
317 if (!fp) {
318 fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
319 return;
320 }
321
322 // Output in UTF32-LE.
323 uint32_t bom = 0x0000FEFF;
324 if (fwrite(&bom, sizeof(bom), 1, fp) != 1) {
325 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
326 (void)fclose(fp);
327 return;
328 }
329
330 for (int i = 0; i < FPDFText_CountChars(textpage); i++) {
331 uint32_t c = FPDFText_GetUnicode(textpage, i);
332 if (fwrite(&c, sizeof(c), 1, fp) != 1) {
333 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
334 break;
335 }
336 }
337 (void)fclose(fp);
338 }
339
WriteAnnot(FPDF_PAGE page,const char * pdf_name,int num)340 void WriteAnnot(FPDF_PAGE page, const char* pdf_name, int num) {
341 // Open the output text file.
342 std::string filename = GeneratePageOutputFilename(pdf_name, num, "annot.txt");
343 if (filename.empty()) {
344 return;
345 }
346 FILE* fp = fopen(filename.c_str(), "w");
347 if (!fp) {
348 fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
349 return;
350 }
351
352 int annot_count = FPDFPage_GetAnnotCount(page);
353 fprintf(fp, "Number of annotations: %d\n\n", annot_count);
354
355 // Iterate through all annotations on this page.
356 for (int i = 0; i < annot_count; ++i) {
357 // Retrieve the annotation object and its subtype.
358 fprintf(fp, "Annotation #%d:\n", i + 1);
359 ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
360 if (!annot) {
361 fprintf(fp, "Failed to retrieve annotation!\n\n");
362 continue;
363 }
364
365 FPDF_ANNOTATION_SUBTYPE subtype = FPDFAnnot_GetSubtype(annot.get());
366 fprintf(fp, "Subtype: %s\n", AnnotSubtypeToCString(subtype));
367
368 // Retrieve the annotation flags.
369 fprintf(fp, "Flags set: %s\n",
370 AnnotFlagsToString(FPDFAnnot_GetFlags(annot.get())).c_str());
371
372 // Retrieve the annotation's object count and object types.
373 const int obj_count = FPDFAnnot_GetObjectCount(annot.get());
374 fprintf(fp, "Number of objects: %d\n", obj_count);
375 if (obj_count > 0) {
376 fprintf(fp, "Object types: ");
377 for (int j = 0; j < obj_count; ++j) {
378 const char* type = PageObjectTypeToCString(
379 FPDFPageObj_GetType(FPDFAnnot_GetObject(annot.get(), j)));
380 fprintf(fp, "%s ", type);
381 }
382 fprintf(fp, "\n");
383 }
384
385 // Retrieve the annotation's color and interior color.
386 unsigned int R;
387 unsigned int G;
388 unsigned int B;
389 unsigned int A;
390 if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_Color, &R, &G, &B,
391 &A)) {
392 fprintf(fp, "Color in RGBA: %d %d %d %d\n", R, G, B, A);
393 } else {
394 fprintf(fp, "Failed to retrieve color.\n");
395 }
396 if (FPDFAnnot_GetColor(annot.get(), FPDFANNOT_COLORTYPE_InteriorColor, &R,
397 &G, &B, &A)) {
398 fprintf(fp, "Interior color in RGBA: %d %d %d %d\n", R, G, B, A);
399 } else {
400 fprintf(fp, "Failed to retrieve interior color.\n");
401 }
402
403 // Retrieve the annotation's contents and author.
404 static constexpr char kContentsKey[] = "Contents";
405 static constexpr char kAuthorKey[] = "T";
406 unsigned long length_bytes =
407 FPDFAnnot_GetStringValue(annot.get(), kContentsKey, nullptr, 0);
408 std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
409 FPDFAnnot_GetStringValue(annot.get(), kContentsKey, buf.data(),
410 length_bytes);
411 fprintf(fp, "Content: %ls\n", GetPlatformWString(buf.data()).c_str());
412 length_bytes =
413 FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, nullptr, 0);
414 buf = GetFPDFWideStringBuffer(length_bytes);
415 FPDFAnnot_GetStringValue(annot.get(), kAuthorKey, buf.data(), length_bytes);
416 fprintf(fp, "Author: %ls\n", GetPlatformWString(buf.data()).c_str());
417
418 // Retrieve the annotation's quadpoints if it is a markup annotation.
419 if (FPDFAnnot_HasAttachmentPoints(annot.get())) {
420 size_t qp_count = FPDFAnnot_CountAttachmentPoints(annot.get());
421 fprintf(fp, "Number of quadpoints sets: %zu\n", qp_count);
422
423 // Iterate through all quadpoints of the current annotation
424 for (size_t j = 0; j < qp_count; ++j) {
425 FS_QUADPOINTSF quadpoints;
426 if (FPDFAnnot_GetAttachmentPoints(annot.get(), j, &quadpoints)) {
427 fprintf(fp,
428 "Quadpoints set #%zu: (%.3f, %.3f), (%.3f, %.3f), "
429 "(%.3f, %.3f), (%.3f, %.3f)\n",
430 j + 1, quadpoints.x1, quadpoints.y1, quadpoints.x2,
431 quadpoints.y2, quadpoints.x3, quadpoints.y3, quadpoints.x4,
432 quadpoints.y4);
433 } else {
434 fprintf(fp, "Failed to retrieve quadpoints set #%zu.\n", j + 1);
435 }
436 }
437 }
438
439 // Retrieve the annotation's rectangle coordinates.
440 FS_RECTF rect;
441 if (FPDFAnnot_GetRect(annot.get(), &rect)) {
442 fprintf(fp, "Rectangle: l - %.3f, b - %.3f, r - %.3f, t - %.3f\n\n",
443 rect.left, rect.bottom, rect.right, rect.top);
444 } else {
445 fprintf(fp, "Failed to retrieve annotation rectangle.\n");
446 }
447 }
448
449 (void)fclose(fp);
450 }
451
WritePng(const char * pdf_name,int num,void * buffer,int stride,int width,int height)452 std::string WritePng(const char* pdf_name,
453 int num,
454 void* buffer,
455 int stride,
456 int width,
457 int height) {
458 if (!CheckDimensions(stride, width, height)) {
459 return "";
460 }
461
462 auto input = pdfium::make_span(static_cast<uint8_t*>(buffer),
463 static_cast<size_t>(stride) * height);
464 std::vector<uint8_t> png_encoding =
465 EncodePng(input, width, height, stride, FPDFBitmap_BGRA);
466 if (png_encoding.empty()) {
467 fprintf(stderr, "Failed to convert bitmap to PNG\n");
468 return "";
469 }
470
471 std::string filename = GeneratePageOutputFilename(pdf_name, num, "png");
472 if (filename.empty()) {
473 return std::string();
474 }
475 FILE* fp = fopen(filename.c_str(), "wb");
476 if (!fp) {
477 fprintf(stderr, "Failed to open %s for output\n", filename.c_str());
478 return std::string();
479 }
480
481 size_t bytes_written =
482 fwrite(&png_encoding.front(), 1, png_encoding.size(), fp);
483 if (bytes_written != png_encoding.size()) {
484 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
485 }
486
487 (void)fclose(fp);
488 return filename;
489 }
490
491 #ifdef _WIN32
WriteBmp(const char * pdf_name,int num,void * buffer,int stride,int width,int height)492 std::string WriteBmp(const char* pdf_name,
493 int num,
494 void* buffer,
495 int stride,
496 int width,
497 int height) {
498 if (!CheckDimensions(stride, width, height)) {
499 return std::string();
500 }
501
502 int out_len = stride * height;
503 if (out_len > INT_MAX / 3) {
504 return std::string();
505 }
506
507 std::string filename = GeneratePageOutputFilename(pdf_name, num, "bmp");
508 if (filename.empty()) {
509 return std::string();
510 }
511 FILE* fp = fopen(filename.c_str(), "wb");
512 if (!fp) {
513 return std::string();
514 }
515
516 BITMAPINFO bmi = {};
517 bmi.bmiHeader.biSize = sizeof(bmi) - sizeof(RGBQUAD);
518 bmi.bmiHeader.biWidth = width;
519 bmi.bmiHeader.biHeight = -height; // top-down image
520 bmi.bmiHeader.biPlanes = 1;
521 bmi.bmiHeader.biBitCount = 32;
522 bmi.bmiHeader.biCompression = BI_RGB;
523 bmi.bmiHeader.biSizeImage = 0;
524
525 BITMAPFILEHEADER file_header = {};
526 file_header.bfType = 0x4d42;
527 file_header.bfSize = sizeof(file_header) + bmi.bmiHeader.biSize + out_len;
528 file_header.bfOffBits = file_header.bfSize - out_len;
529
530 if (fwrite(&file_header, sizeof(file_header), 1, fp) != 1 ||
531 fwrite(&bmi, bmi.bmiHeader.biSize, 1, fp) != 1 ||
532 fwrite(buffer, out_len, 1, fp) != 1) {
533 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
534 }
535 fclose(fp);
536 return filename;
537 }
538
WriteEmf(FPDF_PAGE page,const char * pdf_name,int num)539 void WriteEmf(FPDF_PAGE page, const char* pdf_name, int num) {
540 std::string filename = GeneratePageOutputFilename(pdf_name, num, "emf");
541 if (filename.empty()) {
542 return;
543 }
544
545 HDC dc = CreateEnhMetaFileA(nullptr, filename.c_str(), nullptr, nullptr);
546
547 int width = static_cast<int>(FPDF_GetPageWidthF(page));
548 int height = static_cast<int>(FPDF_GetPageHeightF(page));
549 HRGN rgn = CreateRectRgn(0, 0, width, height);
550 SelectClipRgn(dc, rgn);
551 DeleteObject(rgn);
552
553 SelectObject(dc, GetStockObject(NULL_PEN));
554 SelectObject(dc, GetStockObject(WHITE_BRUSH));
555 // If a PS_NULL pen is used, the dimensions of the rectangle are 1 pixel less.
556 Rectangle(dc, 0, 0, width + 1, height + 1);
557
558 CHECK(FPDF_RenderPage(dc, page, 0, 0, width, height, 0,
559 FPDF_ANNOT | FPDF_PRINTING));
560
561 DeleteEnhMetaFile(CloseEnhMetaFile(dc));
562 }
563
WritePS(FPDF_PAGE page,const char * pdf_name,int num)564 void WritePS(FPDF_PAGE page, const char* pdf_name, int num) {
565 std::string filename = GeneratePageOutputFilename(pdf_name, num, "ps");
566 if (filename.empty()) {
567 return;
568 }
569 FILE* fp = fopen(filename.c_str(), "wb");
570 if (!fp) {
571 return;
572 }
573
574 HDC dc = CreateEnhMetaFileA(nullptr, nullptr, nullptr, nullptr);
575
576 int width = static_cast<int>(FPDF_GetPageWidthF(page));
577 int height = static_cast<int>(FPDF_GetPageHeightF(page));
578 CHECK(FPDF_RenderPage(dc, page, 0, 0, width, height, 0,
579 FPDF_ANNOT | FPDF_PRINTING));
580
581 HENHMETAFILE emf = CloseEnhMetaFile(dc);
582 std::vector<const ENHMETARECORD*> items;
583 EnumEnhMetaFile(nullptr, emf, &EnhMetaFileProc, &items, nullptr);
584 for (const ENHMETARECORD* record : items) {
585 if (record->iType != EMR_GDICOMMENT) {
586 continue;
587 }
588
589 const auto* comment = reinterpret_cast<const EMRGDICOMMENT*>(record);
590 const char* data = reinterpret_cast<const char*>(comment->Data);
591 uint16_t size = *reinterpret_cast<const uint16_t*>(data);
592 if (fwrite(data + sizeof(uint16_t), size, 1, fp) != 1) {
593 fprintf(stderr, "Failed to write to %s\n", filename.c_str());
594 break;
595 }
596 }
597 fclose(fp);
598 DeleteEnhMetaFile(emf);
599 }
600 #endif // _WIN32
601
602 #ifdef PDF_ENABLE_SKIA
WriteToSkWStream(const std::string & pdf_name,int num,const std::string & extension)603 std::unique_ptr<SkWStream> WriteToSkWStream(const std::string& pdf_name,
604 int num,
605 const std::string& extension) {
606 std::string discarded_filename;
607 return WriteToSkWStream(pdf_name, num, extension, discarded_filename);
608 }
609
WriteToSkWStream(const std::string & pdf_name,int num,const std::string & extension,std::string & filename)610 std::unique_ptr<SkWStream> WriteToSkWStream(const std::string& pdf_name,
611 int num,
612 const std::string& extension,
613 std::string& filename) {
614 filename =
615 GeneratePageOutputFilename(pdf_name.c_str(), num, extension.c_str());
616 if (filename.empty()) {
617 return nullptr;
618 }
619
620 auto stream = std::make_unique<SkFILEWStream>(filename.c_str());
621 if (!stream->isValid()) {
622 return nullptr;
623 }
624
625 return stream;
626 }
627
WriteSkp(const char * pdf_name,int num,const SkPicture & picture)628 std::string WriteSkp(const char* pdf_name, int num, const SkPicture& picture) {
629 std::string filename;
630 std::unique_ptr<SkWStream> stream =
631 WriteToSkWStream(pdf_name, num, "skp", filename);
632 if (!stream) {
633 return "";
634 }
635 SkSerialProcs procs;
636 procs.fImageProc = [](SkImage* img, void*) -> sk_sp<SkData> {
637 return SkPngEncoder::Encode(nullptr, img, SkPngEncoder::Options{});
638 };
639
640 picture.serialize(stream.get(), &procs);
641 return filename;
642 }
643 #endif // PDF_ENABLE_SKIA
644
645 enum class ThumbnailDecodeType { kBitmap, kRawStream, kDecodedStream };
646
GetThumbnailFilename(char * name_buf,size_t name_buf_size,const char * pdf_name,int page_num,ThumbnailDecodeType decode_type)647 bool GetThumbnailFilename(char* name_buf,
648 size_t name_buf_size,
649 const char* pdf_name,
650 int page_num,
651 ThumbnailDecodeType decode_type) {
652 const char* format;
653 switch (decode_type) {
654 case ThumbnailDecodeType::kBitmap:
655 format = "%s.thumbnail.%d.png";
656 break;
657 case ThumbnailDecodeType::kDecodedStream:
658 format = "%s.thumbnail.decoded.%d.bin";
659 break;
660 case ThumbnailDecodeType::kRawStream:
661 format = "%s.thumbnail.raw.%d.bin";
662 break;
663 }
664
665 int chars_formatted =
666 snprintf(name_buf, name_buf_size, format, pdf_name, page_num);
667 if (chars_formatted < 0 ||
668 static_cast<size_t>(chars_formatted) >= name_buf_size) {
669 fprintf(stderr, "Filename %s for saving is too long.\n", name_buf);
670 return false;
671 }
672
673 return true;
674 }
675
WriteBufferToFile(const void * buf,size_t buflen,const char * filename,const char * filetype)676 void WriteBufferToFile(const void* buf,
677 size_t buflen,
678 const char* filename,
679 const char* filetype) {
680 FILE* fp = fopen(filename, "wb");
681 if (!fp) {
682 fprintf(stderr, "Failed to open %s for saving %s.", filename, filetype);
683 return;
684 }
685
686 size_t bytes_written = fwrite(buf, 1, buflen, fp);
687 if (bytes_written == buflen) {
688 fprintf(stderr, "Successfully wrote %s %s.\n", filetype, filename);
689 } else {
690 fprintf(stderr, "Failed to write to %s.\n", filename);
691 }
692 fclose(fp);
693 }
694
EncodeBitmapToPng(ScopedFPDFBitmap bitmap)695 std::vector<uint8_t> EncodeBitmapToPng(ScopedFPDFBitmap bitmap) {
696 std::vector<uint8_t> png_encoding;
697 int format = FPDFBitmap_GetFormat(bitmap.get());
698 if (format == FPDFBitmap_Unknown) {
699 return png_encoding;
700 }
701
702 int width = FPDFBitmap_GetWidth(bitmap.get());
703 int height = FPDFBitmap_GetHeight(bitmap.get());
704 int stride = FPDFBitmap_GetStride(bitmap.get());
705 if (!CheckDimensions(stride, width, height)) {
706 return png_encoding;
707 }
708
709 auto input = pdfium::make_span(
710 static_cast<const uint8_t*>(FPDFBitmap_GetBuffer(bitmap.get())),
711 static_cast<size_t>(stride) * height);
712
713 png_encoding = EncodePng(input, width, height, stride, format);
714 return png_encoding;
715 }
716
WriteAttachments(FPDF_DOCUMENT doc,const std::string & name)717 void WriteAttachments(FPDF_DOCUMENT doc, const std::string& name) {
718 for (int i = 0; i < FPDFDoc_GetAttachmentCount(doc); ++i) {
719 FPDF_ATTACHMENT attachment = FPDFDoc_GetAttachment(doc, i);
720
721 // Retrieve the attachment file name.
722 std::string attachment_name;
723 unsigned long length_bytes = FPDFAttachment_GetName(attachment, nullptr, 0);
724 if (length_bytes) {
725 std::vector<FPDF_WCHAR> buf = GetFPDFWideStringBuffer(length_bytes);
726 unsigned long actual_length_bytes =
727 FPDFAttachment_GetName(attachment, buf.data(), length_bytes);
728 if (actual_length_bytes == length_bytes) {
729 attachment_name = GetPlatformString(buf.data());
730 }
731 }
732 if (attachment_name.empty()) {
733 fprintf(stderr, "Attachment #%d has an empty file name.\n", i + 1);
734 continue;
735 }
736
737 // Calculate the full attachment file name.
738 char save_name[256];
739 int chars_formatted =
740 snprintf(save_name, sizeof(save_name), "%s.attachment.%s", name.c_str(),
741 attachment_name.c_str());
742 if (chars_formatted < 0 ||
743 static_cast<size_t>(chars_formatted) >= sizeof(save_name)) {
744 fprintf(stderr, "Filename %s is too long.\n", save_name);
745 continue;
746 }
747
748 // Retrieve the attachment.
749 if (!FPDFAttachment_GetFile(attachment, nullptr, 0, &length_bytes)) {
750 fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
751 attachment_name.c_str());
752 continue;
753 }
754
755 std::vector<char> data_buf(length_bytes);
756 if (length_bytes) {
757 unsigned long actual_length_bytes;
758 if (!FPDFAttachment_GetFile(attachment, data_buf.data(), length_bytes,
759 &actual_length_bytes)) {
760 fprintf(stderr, "Failed to retrieve attachment \"%s\".\n",
761 attachment_name.c_str());
762 continue;
763 }
764 }
765
766 // Write the attachment file. Since a PDF document could have 0-byte files
767 // as attachments, we should allow saving the 0-byte attachments to files.
768 WriteBufferToFile(data_buf.data(), length_bytes, save_name, "attachment");
769 }
770 }
771
WriteImages(FPDF_PAGE page,const char * pdf_name,int page_num)772 void WriteImages(FPDF_PAGE page, const char* pdf_name, int page_num) {
773 for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
774 FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
775 if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
776 continue;
777 }
778
779 ScopedFPDFBitmap bitmap(FPDFImageObj_GetBitmap(obj));
780 if (!bitmap) {
781 fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
782 i + 1, page_num + 1);
783 continue;
784 }
785
786 std::string filename =
787 GenerateImageOutputFilename(pdf_name, page_num, i, "png");
788 if (filename.empty()) {
789 continue;
790 }
791
792 std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
793 if (png_encoding.empty()) {
794 fprintf(stderr,
795 "Failed to convert image object #%d, on page #%d to png.\n",
796 i + 1, page_num + 1);
797 continue;
798 }
799
800 WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
801 filename.c_str(), "image");
802 }
803 }
804
WriteRenderedImages(FPDF_DOCUMENT doc,FPDF_PAGE page,const char * pdf_name,int page_num)805 void WriteRenderedImages(FPDF_DOCUMENT doc,
806 FPDF_PAGE page,
807 const char* pdf_name,
808 int page_num) {
809 for (int i = 0; i < FPDFPage_CountObjects(page); ++i) {
810 FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, i);
811 if (FPDFPageObj_GetType(obj) != FPDF_PAGEOBJ_IMAGE) {
812 continue;
813 }
814
815 ScopedFPDFBitmap bitmap(FPDFImageObj_GetRenderedBitmap(doc, page, obj));
816 if (!bitmap) {
817 fprintf(stderr, "Image object #%d on page #%d has an empty bitmap.\n",
818 i + 1, page_num + 1);
819 continue;
820 }
821
822 std::string filename =
823 GenerateImageOutputFilename(pdf_name, page_num, i, "png");
824 if (filename.empty()) {
825 continue;
826 }
827
828 std::vector<uint8_t> png_encoding = EncodeBitmapToPng(std::move(bitmap));
829 if (png_encoding.empty()) {
830 fprintf(stderr,
831 "Failed to convert image object #%d, on page #%d to png.\n",
832 i + 1, page_num + 1);
833 continue;
834 }
835
836 WriteBufferToFile(&png_encoding.front(), png_encoding.size(),
837 filename.c_str(), "image");
838 }
839 }
840
WriteDecodedThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)841 void WriteDecodedThumbnailStream(FPDF_PAGE page,
842 const char* pdf_name,
843 int page_num) {
844 char filename[256];
845 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
846 ThumbnailDecodeType::kDecodedStream)) {
847 return;
848 }
849
850 unsigned long decoded_data_size =
851 FPDFPage_GetDecodedThumbnailData(page, nullptr, 0u);
852
853 // Only continue if there actually is a thumbnail for this page
854 if (decoded_data_size == 0) {
855 fprintf(stderr, "Failed to get decoded thumbnail for page #%d.\n",
856 page_num + 1);
857 return;
858 }
859
860 std::vector<uint8_t> thumb_buf(decoded_data_size);
861 if (FPDFPage_GetDecodedThumbnailData(
862 page, thumb_buf.data(), decoded_data_size) != decoded_data_size) {
863 fprintf(stderr, "Failed to get decoded thumbnail data for %s.\n", filename);
864 return;
865 }
866
867 WriteBufferToFile(thumb_buf.data(), decoded_data_size, filename,
868 "decoded thumbnail");
869 }
870
WriteRawThumbnailStream(FPDF_PAGE page,const char * pdf_name,int page_num)871 void WriteRawThumbnailStream(FPDF_PAGE page,
872 const char* pdf_name,
873 int page_num) {
874 char filename[256];
875 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
876 ThumbnailDecodeType::kRawStream)) {
877 return;
878 }
879
880 unsigned long raw_data_size = FPDFPage_GetRawThumbnailData(page, nullptr, 0u);
881
882 // Only continue if there actually is a thumbnail for this page
883 if (raw_data_size == 0) {
884 fprintf(stderr, "Failed to get raw thumbnail data for page #%d.\n",
885 page_num + 1);
886 return;
887 }
888
889 std::vector<uint8_t> thumb_buf(raw_data_size);
890 if (FPDFPage_GetRawThumbnailData(page, thumb_buf.data(), raw_data_size) !=
891 raw_data_size) {
892 fprintf(stderr, "Failed to get raw thumbnail data for %s.\n", filename);
893 return;
894 }
895
896 WriteBufferToFile(thumb_buf.data(), raw_data_size, filename, "raw thumbnail");
897 }
898
WriteThumbnail(FPDF_PAGE page,const char * pdf_name,int page_num)899 void WriteThumbnail(FPDF_PAGE page, const char* pdf_name, int page_num) {
900 char filename[256];
901 if (!GetThumbnailFilename(filename, sizeof(filename), pdf_name, page_num,
902 ThumbnailDecodeType::kBitmap)) {
903 return;
904 }
905
906 ScopedFPDFBitmap thumb_bitmap(FPDFPage_GetThumbnailAsBitmap(page));
907 if (!thumb_bitmap) {
908 fprintf(stderr, "Thumbnail of page #%d has an empty bitmap.\n",
909 page_num + 1);
910 return;
911 }
912
913 std::vector<uint8_t> png_encoding =
914 EncodeBitmapToPng(std::move(thumb_bitmap));
915 if (png_encoding.empty()) {
916 fprintf(stderr, "Failed to convert thumbnail of page #%d to png.\n",
917 page_num + 1);
918 return;
919 }
920
921 WriteBufferToFile(&png_encoding.front(), png_encoding.size(), filename,
922 "thumbnail");
923 }
924