• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "testing/helpers/dump.h"
6 
7 #include <limits.h>
8 #include <string.h>
9 
10 #include <algorithm>
11 #include <functional>
12 #include <iterator>
13 #include <string>
14 #include <utility>
15 
16 #include "public/cpp/fpdf_scopers.h"
17 #include "public/fpdf_doc.h"
18 #include "public/fpdf_transformpage.h"
19 #include "testing/fx_string_testhelpers.h"
20 
21 using GetBoxInfoFunc =
22     std::function<bool(FPDF_PAGE, float*, float*, float*, float*)>;
23 
24 namespace {
25 
ConvertToWString(const unsigned short * buf,unsigned long buf_size)26 std::wstring ConvertToWString(const unsigned short* buf,
27                               unsigned long buf_size) {
28   std::wstring result;
29   result.reserve(buf_size);
30   std::copy(buf, buf + buf_size, std::back_inserter(result));
31   return result;
32 }
33 
DumpBoxInfo(GetBoxInfoFunc func,const char * box_type,FPDF_PAGE page,int page_idx)34 void DumpBoxInfo(GetBoxInfoFunc func,
35                  const char* box_type,
36                  FPDF_PAGE page,
37                  int page_idx) {
38   FS_RECTF rect;
39   bool ret = func(page, &rect.left, &rect.bottom, &rect.right, &rect.top);
40   if (!ret) {
41     printf("Page %d: No %s.\n", page_idx, box_type);
42     return;
43   }
44   printf("Page %d: %s: %0.2f %0.2f %0.2f %0.2f\n", page_idx, box_type,
45          rect.left, rect.bottom, rect.right, rect.top);
46 }
47 
DumpStructureElementAttributeValues(FPDF_STRUCTELEMENT_ATTR_VALUE attr_value,const char * name,int indent)48 void DumpStructureElementAttributeValues(
49     FPDF_STRUCTELEMENT_ATTR_VALUE attr_value,
50     const char* name,
51     int indent) {
52   if (!attr_value) {
53     printf("%*s FPDF_StructElement_Attr_GetValue failed for %s\n", indent, "",
54            name);
55     return;
56   }
57 
58   FPDF_OBJECT_TYPE type = FPDF_StructElement_Attr_GetType(attr_value);
59   switch (type) {
60     case FPDF_OBJECT_BOOLEAN: {
61       int value;
62       if (FPDF_StructElement_Attr_GetBooleanValue(attr_value, &value)) {
63         printf("%*s %s: %d\n", indent, "", name, value);
64       } else {
65         printf("%*s %s: Failed FPDF_StructElement_Attr_GetBooleanValue\n",
66                indent, "", name);
67       }
68       break;
69     }
70     case FPDF_OBJECT_NUMBER: {
71       float value;
72       if (FPDF_StructElement_Attr_GetNumberValue(attr_value, &value)) {
73         printf("%*s %s: %f\n", indent, "", name, value);
74       } else {
75         printf("%*s %s: Failed FPDF_StructElement_Attr_GetNumberValue\n",
76                indent, "", name);
77       }
78       break;
79     }
80     case FPDF_OBJECT_STRING:
81     case FPDF_OBJECT_NAME: {
82       static const size_t kBufSize = 1024;
83       unsigned short buffer[kBufSize];
84       unsigned long len;
85       if (FPDF_StructElement_Attr_GetStringValue(attr_value, buffer,
86                                                  sizeof(buffer), &len)) {
87         printf("%*s %s: %ls\n", indent, "", name,
88                ConvertToWString(buffer, len).c_str());
89       } else {
90         printf("%*s %s: Failed FPDF_StructElement_Attr_GetStringValue\n",
91                indent, "", name);
92       }
93       break;
94     }
95     case FPDF_OBJECT_ARRAY: {
96       printf("%*s %s:\n", indent, "", name);
97       int count = FPDF_StructElement_Attr_CountChildren(attr_value);
98       for (int i = 0; i < count; ++i) {
99         DumpStructureElementAttributeValues(
100             FPDF_StructElement_Attr_GetChildAtIndex(attr_value, i), name,
101             indent + 2);
102       }
103       break;
104     }
105     case FPDF_OBJECT_UNKNOWN: {
106       printf("%*s %s: FPDF_OBJECT_UNKNOWN\n", indent, "", name);
107       break;
108     }
109     default: {
110       printf("%*s %s: NOT_YET_IMPLEMENTED: %d\n", indent, "", name, type);
111       break;
112     }
113   }
114 }
115 
DumpStructureElementAttributes(FPDF_STRUCTELEMENT_ATTR attr,int indent)116 void DumpStructureElementAttributes(FPDF_STRUCTELEMENT_ATTR attr, int indent) {
117   static const size_t kBufSize = 1024;
118   int count = FPDF_StructElement_Attr_GetCount(attr);
119   for (int i = 0; i < count; ++i) {
120     char name[kBufSize];
121     unsigned long len;
122     if (!FPDF_StructElement_Attr_GetName(attr, i, name, sizeof(name), &len)) {
123       printf("%*s FPDF_StructElement_Attr_GetName failed for %d\n", indent, "",
124              i);
125       continue;
126     }
127 
128     DumpStructureElementAttributeValues(
129         FPDF_StructElement_Attr_GetValue(attr, name), name, indent);
130   }
131 }
132 
133 }  // namespace
134 
DumpChildStructure(FPDF_STRUCTELEMENT child,int indent)135 void DumpChildStructure(FPDF_STRUCTELEMENT child, int indent) {
136   static const size_t kBufSize = 1024;
137   unsigned short buf[kBufSize];
138   unsigned long len = FPDF_StructElement_GetType(child, buf, kBufSize);
139   if (len > 0) {
140     printf("%*s S: %ls\n", indent * 2, "", ConvertToWString(buf, len).c_str());
141   }
142 
143   int attr_count = FPDF_StructElement_GetAttributeCount(child);
144   for (int i = 0; i < attr_count; i++) {
145     FPDF_STRUCTELEMENT_ATTR child_attr =
146         FPDF_StructElement_GetAttributeAtIndex(child, i);
147     if (!child_attr) {
148       continue;
149     }
150     printf("%*s A[%d]:\n", indent * 2, "", i);
151     DumpStructureElementAttributes(child_attr, indent * 2 + 2);
152   }
153 
154   memset(buf, 0, sizeof(buf));
155   len = FPDF_StructElement_GetActualText(child, buf, kBufSize);
156   if (len > 0) {
157     printf("%*s ActualText: %ls\n", indent * 2, "",
158            ConvertToWString(buf, len).c_str());
159   }
160 
161   memset(buf, 0, sizeof(buf));
162   len = FPDF_StructElement_GetAltText(child, buf, kBufSize);
163   if (len > 0) {
164     printf("%*s AltText: %ls\n", indent * 2, "",
165            ConvertToWString(buf, len).c_str());
166   }
167 
168   memset(buf, 0, sizeof(buf));
169   len = FPDF_StructElement_GetID(child, buf, kBufSize);
170   if (len > 0) {
171     printf("%*s ID: %ls\n", indent * 2, "", ConvertToWString(buf, len).c_str());
172   }
173 
174   memset(buf, 0, sizeof(buf));
175   len = FPDF_StructElement_GetLang(child, buf, kBufSize);
176   if (len > 0) {
177     printf("%*s Lang: %ls\n", indent * 2, "",
178            ConvertToWString(buf, len).c_str());
179   }
180 
181   const int mcid_count = FPDF_StructElement_GetMarkedContentIdCount(child);
182   for (int i = 0; i < mcid_count; ++i) {
183     int mcid = FPDF_StructElement_GetMarkedContentIdAtIndex(child, i);
184     if (mcid != -1) {
185       printf("%*s MCID%d: %d\n", indent * 2, "", i, mcid);
186     }
187   }
188 
189   FPDF_STRUCTELEMENT parent = FPDF_StructElement_GetParent(child);
190   if (parent) {
191     memset(buf, 0, sizeof(buf));
192     len = FPDF_StructElement_GetID(parent, buf, kBufSize);
193     if (len > 0) {
194       printf("%*s Parent ID: %ls\n", indent * 2, "",
195              ConvertToWString(buf, len).c_str());
196     }
197   }
198 
199   memset(buf, 0, sizeof(buf));
200   len = FPDF_StructElement_GetTitle(child, buf, kBufSize);
201   if (len > 0) {
202     printf("%*s Title: %ls\n", indent * 2, "",
203            ConvertToWString(buf, len).c_str());
204   }
205 
206   memset(buf, 0, sizeof(buf));
207   len = FPDF_StructElement_GetObjType(child, buf, kBufSize);
208   if (len > 0) {
209     printf("%*s Type: %ls\n", indent * 2, "",
210            ConvertToWString(buf, len).c_str());
211   }
212 
213   for (int i = 0; i < FPDF_StructElement_CountChildren(child); ++i) {
214     FPDF_STRUCTELEMENT sub_child = FPDF_StructElement_GetChildAtIndex(child, i);
215     // If the child is not an Element then this will return null. This can
216     // happen if the element is things like an object reference or a stream.
217     if (!sub_child) {
218       continue;
219     }
220 
221     DumpChildStructure(sub_child, indent + 1);
222   }
223 }
224 
DumpPageInfo(FPDF_PAGE page,int page_idx)225 void DumpPageInfo(FPDF_PAGE page, int page_idx) {
226   DumpBoxInfo(&FPDFPage_GetMediaBox, "MediaBox", page, page_idx);
227   DumpBoxInfo(&FPDFPage_GetCropBox, "CropBox", page, page_idx);
228   DumpBoxInfo(&FPDFPage_GetBleedBox, "BleedBox", page, page_idx);
229   DumpBoxInfo(&FPDFPage_GetTrimBox, "TrimBox", page, page_idx);
230   DumpBoxInfo(&FPDFPage_GetArtBox, "ArtBox", page, page_idx);
231 }
232 
DumpPageStructure(FPDF_PAGE page,int page_idx)233 void DumpPageStructure(FPDF_PAGE page, int page_idx) {
234   ScopedFPDFStructTree tree(FPDF_StructTree_GetForPage(page));
235   if (!tree) {
236     fprintf(stderr, "Failed to load struct tree for page %d\n", page_idx);
237     return;
238   }
239 
240   printf("Structure Tree for Page %d\n", page_idx);
241   for (int i = 0; i < FPDF_StructTree_CountChildren(tree.get()); ++i) {
242     FPDF_STRUCTELEMENT child = FPDF_StructTree_GetChildAtIndex(tree.get(), i);
243     if (!child) {
244       fprintf(stderr, "Failed to load child %d for page %d\n", i, page_idx);
245       continue;
246     }
247     DumpChildStructure(child, 0);
248   }
249   printf("\n\n");
250 }
251 
DumpMetaData(FPDF_DOCUMENT doc)252 void DumpMetaData(FPDF_DOCUMENT doc) {
253   static constexpr const char* kMetaTags[] = {
254       "Title",   "Author",   "Subject",      "Keywords",
255       "Creator", "Producer", "CreationDate", "ModDate"};
256   for (const char* meta_tag : kMetaTags) {
257     char meta_buffer[4096];
258     unsigned long len =
259         FPDF_GetMetaText(doc, meta_tag, meta_buffer, sizeof(meta_buffer));
260     if (!len) {
261       continue;
262     }
263 
264     auto* meta_string = reinterpret_cast<unsigned short*>(meta_buffer);
265     printf("%-12s = %ls (%lu bytes)\n", meta_tag,
266            GetPlatformWString(meta_string).c_str(), len);
267   }
268 }
269