• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "public/fpdf_structtree.h"
6 
7 #include <memory>
8 
9 #include "core/fpdfapi/page/cpdf_page.h"
10 #include "core/fpdfapi/parser/cpdf_array.h"
11 #include "core/fpdfapi/parser/cpdf_dictionary.h"
12 #include "core/fpdfdoc/cpdf_structelement.h"
13 #include "core/fpdfdoc/cpdf_structtree.h"
14 #include "core/fxcrt/fx_safe_types.h"
15 #include "core/fxcrt/stl_util.h"
16 #include "fpdfsdk/cpdfsdk_helpers.h"
17 #include "third_party/base/numerics/safe_conversions.h"
18 
19 namespace {
20 
WideStringToBuffer(const WideString & str,void * buffer,unsigned long buflen)21 unsigned long WideStringToBuffer(const WideString& str,
22                                  void* buffer,
23                                  unsigned long buflen) {
24   if (str.IsEmpty())
25     return 0;
26 
27   ByteString encodedStr = str.ToUTF16LE();
28   const unsigned long len =
29       pdfium::base::checked_cast<unsigned long>(encodedStr.GetLength());
30   if (buffer && len <= buflen)
31     memcpy(buffer, encodedStr.c_str(), len);
32   return len;
33 }
34 
GetMcidFromDict(const CPDF_Dictionary * dict)35 int GetMcidFromDict(const CPDF_Dictionary* dict) {
36   if (dict && dict->GetNameFor("Type") == "MCR") {
37     RetainPtr<const CPDF_Object> obj = dict->GetObjectFor("MCID");
38     if (obj && obj->IsNumber())
39       return obj->GetInteger();
40   }
41   return -1;
42 }
43 
44 }  // namespace
45 
46 FPDF_EXPORT FPDF_STRUCTTREE FPDF_CALLCONV
FPDF_StructTree_GetForPage(FPDF_PAGE page)47 FPDF_StructTree_GetForPage(FPDF_PAGE page) {
48   CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
49   if (!pPage)
50     return nullptr;
51 
52   // Caller takes onwership.
53   return FPDFStructTreeFromCPDFStructTree(
54       CPDF_StructTree::LoadPage(pPage->GetDocument(), pPage->GetDict())
55           .release());
56 }
57 
58 FPDF_EXPORT void FPDF_CALLCONV
FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree)59 FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree) {
60   std::unique_ptr<CPDF_StructTree>(
61       CPDFStructTreeFromFPDFStructTree(struct_tree));
62 }
63 
64 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree)65 FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree) {
66   CPDF_StructTree* tree = CPDFStructTreeFromFPDFStructTree(struct_tree);
67   if (!tree)
68     return -1;
69 
70   FX_SAFE_INT32 tmp_size = tree->CountTopElements();
71   return tmp_size.ValueOrDefault(-1);
72 }
73 
74 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree,int index)75 FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree, int index) {
76   CPDF_StructTree* tree = CPDFStructTreeFromFPDFStructTree(struct_tree);
77   if (!tree || index < 0 ||
78       static_cast<size_t>(index) >= tree->CountTopElements()) {
79     return nullptr;
80   }
81   return FPDFStructElementFromCPDFStructElement(
82       tree->GetTopElement(static_cast<size_t>(index)));
83 }
84 
85 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)86 FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,
87                               void* buffer,
88                               unsigned long buflen) {
89   CPDF_StructElement* elem =
90       CPDFStructElementFromFPDFStructElement(struct_element);
91   return elem ? WideStringToBuffer(elem->GetAltText(), buffer, buflen) : 0;
92 }
93 
94 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetActualText(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)95 FPDF_StructElement_GetActualText(FPDF_STRUCTELEMENT struct_element,
96                                  void* buffer,
97                                  unsigned long buflen) {
98   CPDF_StructElement* elem =
99       CPDFStructElementFromFPDFStructElement(struct_element);
100   return elem ? WideStringToBuffer(elem->GetActualText(), buffer, buflen) : 0;
101 }
102 
103 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetID(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)104 FPDF_StructElement_GetID(FPDF_STRUCTELEMENT struct_element,
105                          void* buffer,
106                          unsigned long buflen) {
107   CPDF_StructElement* elem =
108       CPDFStructElementFromFPDFStructElement(struct_element);
109   if (!elem)
110     return 0;
111   absl::optional<WideString> id = elem->GetID();
112   if (!id.has_value())
113     return 0;
114   return Utf16EncodeMaybeCopyAndReturnLength(id.value(), buffer, buflen);
115 }
116 
117 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetLang(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)118 FPDF_StructElement_GetLang(FPDF_STRUCTELEMENT struct_element,
119                            void* buffer,
120                            unsigned long buflen) {
121   CPDF_StructElement* elem =
122       CPDFStructElementFromFPDFStructElement(struct_element);
123   if (!elem)
124     return 0;
125   absl::optional<WideString> lang = elem->GetLang();
126   if (!lang.has_value())
127     return 0;
128   return Utf16EncodeMaybeCopyAndReturnLength(lang.value(), buffer, buflen);
129 }
130 
131 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetAttributeCount(FPDF_STRUCTELEMENT struct_element)132 FPDF_StructElement_GetAttributeCount(FPDF_STRUCTELEMENT struct_element) {
133   CPDF_StructElement* elem =
134       CPDFStructElementFromFPDFStructElement(struct_element);
135   if (!elem)
136     return -1;
137   RetainPtr<const CPDF_Object> attr_obj = elem->GetA();
138   if (!attr_obj)
139     return -1;
140   if (attr_obj->IsArray())
141     return fxcrt::CollectionSize<int>(*attr_obj->AsArray());
142   return attr_obj->IsDictionary() ? 1 : -1;
143 }
144 
145 FPDF_EXPORT FPDF_STRUCTELEMENT_ATTR FPDF_CALLCONV
FPDF_StructElement_GetAttributeAtIndex(FPDF_STRUCTELEMENT struct_element,int index)146 FPDF_StructElement_GetAttributeAtIndex(FPDF_STRUCTELEMENT struct_element,
147                                        int index) {
148   CPDF_StructElement* elem =
149       CPDFStructElementFromFPDFStructElement(struct_element);
150   if (!elem)
151     return nullptr;
152 
153   RetainPtr<const CPDF_Object> attr_obj = elem->GetA();
154   if (!attr_obj)
155     return nullptr;
156 
157   if (attr_obj->IsDictionary()) {
158     return index == 0 ? FPDFStructElementAttrFromCPDFDictionary(
159                             attr_obj->AsDictionary())
160                       : nullptr;
161   }
162   if (attr_obj->IsArray()) {
163     const CPDF_Array* array = attr_obj->AsArray();
164     if (index < 0 || static_cast<size_t>(index) >= array->size())
165       return nullptr;
166 
167     // TODO(tsepez): should embedder take a reference here?
168     // Unretained reference in public API. NOLINTNEXTLINE
169     return FPDFStructElementAttrFromCPDFDictionary(array->GetDictAt(index));
170   }
171   return nullptr;
172 }
173 
174 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetStringAttribute(FPDF_STRUCTELEMENT struct_element,FPDF_BYTESTRING attr_name,void * buffer,unsigned long buflen)175 FPDF_StructElement_GetStringAttribute(FPDF_STRUCTELEMENT struct_element,
176                                       FPDF_BYTESTRING attr_name,
177                                       void* buffer,
178                                       unsigned long buflen) {
179   CPDF_StructElement* elem =
180       CPDFStructElementFromFPDFStructElement(struct_element);
181   if (!elem)
182     return 0;
183   RetainPtr<const CPDF_Array> array = ToArray(elem->GetA());
184   if (!array)
185     return 0;
186   CPDF_ArrayLocker locker(array);
187   for (const RetainPtr<CPDF_Object>& obj : locker) {
188     const CPDF_Dictionary* obj_dict = obj->AsDictionary();
189     if (!obj_dict)
190       continue;
191     RetainPtr<const CPDF_Object> attr = obj_dict->GetObjectFor(attr_name);
192     if (!attr || !(attr->IsString() || attr->IsName()))
193       continue;
194     return Utf16EncodeMaybeCopyAndReturnLength(attr->GetUnicodeText(), buffer,
195                                                buflen);
196   }
197   return 0;
198 }
199 
200 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentID(FPDF_STRUCTELEMENT struct_element)201 FPDF_StructElement_GetMarkedContentID(FPDF_STRUCTELEMENT struct_element) {
202   CPDF_StructElement* elem =
203       CPDFStructElementFromFPDFStructElement(struct_element);
204   if (!elem)
205     return -1;
206   RetainPtr<const CPDF_Object> p = elem->GetK();
207   return p && p->IsNumber() ? p->GetInteger() : -1;
208 }
209 
210 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)211 FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element,
212                            void* buffer,
213                            unsigned long buflen) {
214   CPDF_StructElement* elem =
215       CPDFStructElementFromFPDFStructElement(struct_element);
216   return elem ? WideStringToBuffer(
217                     WideString::FromUTF8(elem->GetType().AsStringView()),
218                     buffer, buflen)
219               : 0;
220 }
221 
222 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)223 FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element,
224                               void* buffer,
225                               unsigned long buflen) {
226   CPDF_StructElement* elem =
227       CPDFStructElementFromFPDFStructElement(struct_element);
228   return elem ? WideStringToBuffer(
229                     WideString::FromUTF8(elem->GetObjType().AsStringView()),
230                     buffer, buflen)
231               : 0;
232 }
233 
234 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)235 FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,
236                             void* buffer,
237                             unsigned long buflen) {
238   CPDF_StructElement* elem =
239       CPDFStructElementFromFPDFStructElement(struct_element);
240   return elem ? WideStringToBuffer(elem->GetTitle(), buffer, buflen) : 0;
241 }
242 
243 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element)244 FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) {
245   CPDF_StructElement* elem =
246       CPDFStructElementFromFPDFStructElement(struct_element);
247   if (!elem)
248     return -1;
249 
250   FX_SAFE_INT32 tmp_size = elem->CountKids();
251   return tmp_size.ValueOrDefault(-1);
252 }
253 
254 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,int index)255 FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,
256                                    int index) {
257   CPDF_StructElement* elem =
258       CPDFStructElementFromFPDFStructElement(struct_element);
259   if (!elem || index < 0 || static_cast<size_t>(index) >= elem->CountKids())
260     return nullptr;
261 
262   return FPDFStructElementFromCPDFStructElement(elem->GetKidIfElement(index));
263 }
264 
265 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element)266 FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element) {
267   CPDF_StructElement* elem =
268       CPDFStructElementFromFPDFStructElement(struct_element);
269   CPDF_StructElement* parent = elem ? elem->GetParent() : nullptr;
270   if (!parent) {
271     return nullptr;
272   }
273   return FPDFStructElementFromCPDFStructElement(parent);
274 }
275 
276 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_Attr_GetCount(FPDF_STRUCTELEMENT_ATTR struct_attribute)277 FPDF_StructElement_Attr_GetCount(FPDF_STRUCTELEMENT_ATTR struct_attribute) {
278   const CPDF_Dictionary* dict =
279       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
280   if (!dict)
281     return -1;
282   return fxcrt::CollectionSize<int>(*dict);
283 }
284 
285 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetName(FPDF_STRUCTELEMENT_ATTR struct_attribute,int index,void * buffer,unsigned long buflen,unsigned long * out_buflen)286 FPDF_StructElement_Attr_GetName(FPDF_STRUCTELEMENT_ATTR struct_attribute,
287                                 int index,
288                                 void* buffer,
289                                 unsigned long buflen,
290                                 unsigned long* out_buflen) {
291   if (!out_buflen || !buffer)
292     return false;
293 
294   const CPDF_Dictionary* dict =
295       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
296   if (!dict)
297     return false;
298 
299   CPDF_DictionaryLocker locker(dict);
300   for (auto& it : locker) {
301     if (index == 0) {
302       *out_buflen =
303           NulTerminateMaybeCopyAndReturnLength(it.first, buffer, buflen);
304       return true;
305     }
306     --index;
307   }
308   return false;
309 }
310 
311 FPDF_EXPORT FPDF_OBJECT_TYPE FPDF_CALLCONV
FPDF_StructElement_Attr_GetType(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name)312 FPDF_StructElement_Attr_GetType(FPDF_STRUCTELEMENT_ATTR struct_attribute,
313                                 FPDF_BYTESTRING name) {
314   const CPDF_Dictionary* dict =
315       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
316   if (!dict)
317     return FPDF_OBJECT_UNKNOWN;
318 
319   RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
320   return obj ? obj->GetType() : FPDF_OBJECT_UNKNOWN;
321 }
322 
FPDF_StructElement_Attr_GetBooleanValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,FPDF_BOOL * out_value)323 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_StructElement_Attr_GetBooleanValue(
324     FPDF_STRUCTELEMENT_ATTR struct_attribute,
325     FPDF_BYTESTRING name,
326     FPDF_BOOL* out_value) {
327   if (!out_value)
328     return false;
329 
330   const CPDF_Dictionary* dict =
331       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
332   if (!dict)
333     return false;
334 
335   RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
336   if (!obj || !obj->IsBoolean())
337     return false;
338 
339   *out_value = obj->GetInteger();
340   return true;
341 }
342 
343 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetNumberValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,float * out_value)344 FPDF_StructElement_Attr_GetNumberValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
345                                        FPDF_BYTESTRING name,
346                                        float* out_value) {
347   if (!out_value)
348     return false;
349 
350   const CPDF_Dictionary* dict =
351       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
352   if (!dict)
353     return false;
354 
355   RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
356   if (!obj || !obj->IsNumber())
357     return false;
358 
359   *out_value = obj->GetNumber();
360   return true;
361 }
362 
363 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetStringValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,void * buffer,unsigned long buflen,unsigned long * out_buflen)364 FPDF_StructElement_Attr_GetStringValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
365                                        FPDF_BYTESTRING name,
366                                        void* buffer,
367                                        unsigned long buflen,
368                                        unsigned long* out_buflen) {
369   if (!out_buflen)
370     return false;
371 
372   const CPDF_Dictionary* dict =
373       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
374   if (!dict)
375     return false;
376 
377   RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
378   if (!obj || !(obj->IsString() || obj->IsName()))
379     return false;
380 
381   *out_buflen = Utf16EncodeMaybeCopyAndReturnLength(
382       WideString::FromUTF8(obj->GetString().AsStringView()), buffer, buflen);
383   return true;
384 }
385 
386 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetBlobValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,void * buffer,unsigned long buflen,unsigned long * out_buflen)387 FPDF_StructElement_Attr_GetBlobValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
388                                      FPDF_BYTESTRING name,
389                                      void* buffer,
390                                      unsigned long buflen,
391                                      unsigned long* out_buflen) {
392   if (!out_buflen)
393     return false;
394 
395   const CPDF_Dictionary* dict =
396       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
397   if (!dict)
398     return false;
399 
400   RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
401   if (!obj || !obj->IsString())
402     return false;
403 
404   ByteString result = obj->GetString();
405   const unsigned long len =
406       pdfium::base::checked_cast<unsigned long>(result.GetLength());
407   if (buffer && len <= buflen)
408     memcpy(buffer, result.c_str(), len);
409 
410   *out_buflen = len;
411   return true;
412 }
413 
414 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentIdCount(FPDF_STRUCTELEMENT struct_element)415 FPDF_StructElement_GetMarkedContentIdCount(FPDF_STRUCTELEMENT struct_element) {
416   CPDF_StructElement* elem =
417       CPDFStructElementFromFPDFStructElement(struct_element);
418   if (!elem)
419     return -1;
420   RetainPtr<const CPDF_Object> p = elem->GetK();
421   if (!p)
422     return -1;
423 
424   if (p->IsNumber() || p->IsDictionary())
425     return 1;
426 
427   return p->IsArray() ? fxcrt::CollectionSize<int>(*p->AsArray()) : -1;
428 }
429 
430 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentIdAtIndex(FPDF_STRUCTELEMENT struct_element,int index)431 FPDF_StructElement_GetMarkedContentIdAtIndex(FPDF_STRUCTELEMENT struct_element,
432                                              int index) {
433   CPDF_StructElement* elem =
434       CPDFStructElementFromFPDFStructElement(struct_element);
435   if (!elem)
436     return -1;
437   RetainPtr<const CPDF_Object> p = elem->GetK();
438   if (!p)
439     return -1;
440 
441   if (p->IsNumber())
442     return index == 0 ? p->GetInteger() : -1;
443 
444   if (p->IsDictionary())
445     return GetMcidFromDict(p->GetDict().Get());
446 
447   if (p->IsArray()) {
448     const CPDF_Array* array = p->AsArray();
449     if (index < 0 || static_cast<size_t>(index) >= array->size())
450       return -1;
451     RetainPtr<const CPDF_Object> array_elem = array->GetObjectAt(index);
452     if (array_elem->IsNumber())
453       return array_elem->GetInteger();
454     if (array_elem->IsDictionary()) {
455       return GetMcidFromDict(array_elem->GetDict().Get());
456     }
457   }
458   return -1;
459 }
460