• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "samples/pdfium_test_dump_helper.h"
6 
7 #include <limits.h>
8 #include <string.h>
9 
10 #include <algorithm>
11 #include <functional>
12 #include <iterator>
13 #include <string>
14 #include <utility>
15 
16 #include "public/cpp/fpdf_scopers.h"
17 #include "public/fpdf_doc.h"
18 #include "public/fpdf_transformpage.h"
19 #include "testing/fx_string_testhelpers.h"
20 
21 using GetBoxInfoFunc =
22     std::function<bool(FPDF_PAGE, float*, float*, float*, float*)>;
23 
24 namespace {
25 
ConvertToWString(const unsigned short * buf,unsigned long buf_size)26 std::wstring ConvertToWString(const unsigned short* buf,
27                               unsigned long buf_size) {
28   std::wstring result;
29   result.reserve(buf_size);
30   std::copy(buf, buf + buf_size, std::back_inserter(result));
31   return result;
32 }
33 
DumpBoxInfo(GetBoxInfoFunc func,const char * box_type,FPDF_PAGE page,int page_idx)34 void DumpBoxInfo(GetBoxInfoFunc func,
35                  const char* box_type,
36                  FPDF_PAGE page,
37                  int page_idx) {
38   FS_RECTF rect;
39   bool ret = func(page, &rect.left, &rect.bottom, &rect.right, &rect.top);
40   if (!ret) {
41     printf("Page %d: No %s.\n", page_idx, box_type);
42     return;
43   }
44   printf("Page %d: %s: %0.2f %0.2f %0.2f %0.2f\n", page_idx, box_type,
45          rect.left, rect.bottom, rect.right, rect.top);
46 }
47 
DumpStructureElementAttributes(FPDF_STRUCTELEMENT_ATTR attr,int indent)48 void DumpStructureElementAttributes(FPDF_STRUCTELEMENT_ATTR attr, int indent) {
49   static const size_t kBufSize = 1024;
50   int count = FPDF_StructElement_Attr_GetCount(attr);
51   for (int i = 0; i < count; i++) {
52     char name[kBufSize] = {};
53     unsigned long len = ULONG_MAX;
54     if (!FPDF_StructElement_Attr_GetName(attr, i, name, sizeof(name), &len)) {
55       printf("%*s FPDF_StructElement_Attr_GetName failed for %d\n", indent, "",
56              i);
57       continue;
58     }
59 
60     FPDF_OBJECT_TYPE type = FPDF_StructElement_Attr_GetType(attr, name);
61     if (type == FPDF_OBJECT_BOOLEAN) {
62       int value;
63       if (!FPDF_StructElement_Attr_GetBooleanValue(attr, name, &value)) {
64         printf("%*s %s: Failed FPDF_StructElement_Attr_GetBooleanValue\n",
65                indent, "", name);
66         continue;
67       }
68       printf("%*s %s: %d\n", indent, "", name, value);
69     } else if (type == FPDF_OBJECT_NUMBER) {
70       float value;
71       if (!FPDF_StructElement_Attr_GetNumberValue(attr, name, &value)) {
72         printf("%*s %s: Failed FPDF_StructElement_Attr_GetNumberValue\n",
73                indent, "", name);
74         continue;
75       }
76       printf("%*s %s: %f\n", indent, "", name, value);
77     } else if (type == FPDF_OBJECT_STRING || type == FPDF_OBJECT_NAME) {
78       unsigned short buffer[kBufSize] = {};
79       if (!FPDF_StructElement_Attr_GetStringValue(attr, name, buffer,
80                                                   sizeof(buffer), &len)) {
81         printf("%*s %s: Failed FPDF_StructElement_Attr_GetStringValue\n",
82                indent, "", name);
83         continue;
84       }
85       printf("%*s %s: %ls\n", indent, "", name,
86              ConvertToWString(buffer, len).c_str());
87     } else if (type == FPDF_OBJECT_UNKNOWN) {
88       printf("%*s %s: FPDF_OBJECT_UNKNOWN\n", indent, "", name);
89     } else {
90       printf("%*s %s: NOT_YET_IMPLEMENTED: %d\n", indent, "", name, type);
91     }
92   }
93 }
94 
95 }  // namespace
96 
DumpChildStructure(FPDF_STRUCTELEMENT child,int indent)97 void DumpChildStructure(FPDF_STRUCTELEMENT child, int indent) {
98   static const size_t kBufSize = 1024;
99   unsigned short buf[kBufSize];
100   unsigned long len = FPDF_StructElement_GetType(child, buf, kBufSize);
101   if (len > 0)
102     printf("%*s S: %ls\n", indent * 2, "", ConvertToWString(buf, len).c_str());
103 
104   int attr_count = FPDF_StructElement_GetAttributeCount(child);
105   for (int i = 0; i < attr_count; i++) {
106     FPDF_STRUCTELEMENT_ATTR child_attr =
107         FPDF_StructElement_GetAttributeAtIndex(child, i);
108     if (!child_attr)
109       continue;
110     printf("%*s A[%d]:\n", indent * 2, "", i);
111     DumpStructureElementAttributes(child_attr, indent * 2 + 2);
112   }
113 
114   memset(buf, 0, sizeof(buf));
115   len = FPDF_StructElement_GetActualText(child, buf, kBufSize);
116   if (len > 0) {
117     printf("%*s ActualText: %ls\n", indent * 2, "",
118            ConvertToWString(buf, len).c_str());
119   }
120 
121   memset(buf, 0, sizeof(buf));
122   len = FPDF_StructElement_GetAltText(child, buf, kBufSize);
123   if (len > 0) {
124     printf("%*s AltText: %ls\n", indent * 2, "",
125            ConvertToWString(buf, len).c_str());
126   }
127 
128   memset(buf, 0, sizeof(buf));
129   len = FPDF_StructElement_GetID(child, buf, kBufSize);
130   if (len > 0)
131     printf("%*s ID: %ls\n", indent * 2, "", ConvertToWString(buf, len).c_str());
132 
133   memset(buf, 0, sizeof(buf));
134   len = FPDF_StructElement_GetLang(child, buf, kBufSize);
135   if (len > 0) {
136     printf("%*s Lang: %ls\n", indent * 2, "",
137            ConvertToWString(buf, len).c_str());
138   }
139 
140   int mcid = FPDF_StructElement_GetMarkedContentID(child);
141   if (mcid != -1)
142     printf("%*s MCID: %d\n", indent * 2, "", mcid);
143 
144   FPDF_STRUCTELEMENT parent = FPDF_StructElement_GetParent(child);
145   if (parent) {
146     memset(buf, 0, sizeof(buf));
147     len = FPDF_StructElement_GetID(parent, buf, kBufSize);
148     if (len > 0) {
149       printf("%*s Parent ID: %ls\n", indent * 2, "",
150              ConvertToWString(buf, len).c_str());
151     }
152   }
153 
154   memset(buf, 0, sizeof(buf));
155   len = FPDF_StructElement_GetTitle(child, buf, kBufSize);
156   if (len > 0) {
157     printf("%*s Title: %ls\n", indent * 2, "",
158            ConvertToWString(buf, len).c_str());
159   }
160 
161   memset(buf, 0, sizeof(buf));
162   len = FPDF_StructElement_GetObjType(child, buf, kBufSize);
163   if (len > 0) {
164     printf("%*s Type: %ls\n", indent * 2, "",
165            ConvertToWString(buf, len).c_str());
166   }
167 
168   for (int i = 0; i < FPDF_StructElement_CountChildren(child); ++i) {
169     FPDF_STRUCTELEMENT sub_child = FPDF_StructElement_GetChildAtIndex(child, i);
170     // If the child is not an Element then this will return null. This can
171     // happen if the element is things like an object reference or a stream.
172     if (!sub_child)
173       continue;
174 
175     DumpChildStructure(sub_child, indent + 1);
176   }
177 }
178 
DumpPageInfo(FPDF_PAGE page,int page_idx)179 void DumpPageInfo(FPDF_PAGE page, int page_idx) {
180   DumpBoxInfo(&FPDFPage_GetMediaBox, "MediaBox", page, page_idx);
181   DumpBoxInfo(&FPDFPage_GetCropBox, "CropBox", page, page_idx);
182   DumpBoxInfo(&FPDFPage_GetBleedBox, "BleedBox", page, page_idx);
183   DumpBoxInfo(&FPDFPage_GetTrimBox, "TrimBox", page, page_idx);
184   DumpBoxInfo(&FPDFPage_GetArtBox, "ArtBox", page, page_idx);
185 }
186 
DumpPageStructure(FPDF_PAGE page,int page_idx)187 void DumpPageStructure(FPDF_PAGE page, int page_idx) {
188   ScopedFPDFStructTree tree(FPDF_StructTree_GetForPage(page));
189   if (!tree) {
190     fprintf(stderr, "Failed to load struct tree for page %d\n", page_idx);
191     return;
192   }
193 
194   printf("Structure Tree for Page %d\n", page_idx);
195   for (int i = 0; i < FPDF_StructTree_CountChildren(tree.get()); ++i) {
196     FPDF_STRUCTELEMENT child = FPDF_StructTree_GetChildAtIndex(tree.get(), i);
197     if (!child) {
198       fprintf(stderr, "Failed to load child %d for page %d\n", i, page_idx);
199       continue;
200     }
201     DumpChildStructure(child, 0);
202   }
203   printf("\n\n");
204 }
205 
DumpMetaData(FPDF_DOCUMENT doc)206 void DumpMetaData(FPDF_DOCUMENT doc) {
207   static constexpr const char* kMetaTags[] = {
208       "Title",   "Author",   "Subject",      "Keywords",
209       "Creator", "Producer", "CreationDate", "ModDate"};
210   for (const char* meta_tag : kMetaTags) {
211     char meta_buffer[4096];
212     unsigned long len =
213         FPDF_GetMetaText(doc, meta_tag, meta_buffer, sizeof(meta_buffer));
214     if (!len)
215       continue;
216 
217     auto* meta_string = reinterpret_cast<unsigned short*>(meta_buffer);
218     printf("%-12s = %ls (%lu bytes)\n", meta_tag,
219            GetPlatformWString(meta_string).c_str(), len);
220   }
221 }
222