1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "public/fpdf_structtree.h"
6
7 #include <memory>
8
9 #include "core/fpdfapi/page/cpdf_page.h"
10 #include "core/fpdfapi/parser/cpdf_array.h"
11 #include "core/fpdfapi/parser/cpdf_dictionary.h"
12 #include "core/fpdfdoc/cpdf_structelement.h"
13 #include "core/fpdfdoc/cpdf_structtree.h"
14 #include "core/fxcrt/fx_safe_types.h"
15 #include "core/fxcrt/stl_util.h"
16 #include "fpdfsdk/cpdfsdk_helpers.h"
17 #include "third_party/base/numerics/safe_conversions.h"
18
19 namespace {
20
WideStringToBuffer(const WideString & str,void * buffer,unsigned long buflen)21 unsigned long WideStringToBuffer(const WideString& str,
22 void* buffer,
23 unsigned long buflen) {
24 if (str.IsEmpty())
25 return 0;
26
27 ByteString encodedStr = str.ToUTF16LE();
28 const unsigned long len =
29 pdfium::base::checked_cast<unsigned long>(encodedStr.GetLength());
30 if (buffer && len <= buflen)
31 memcpy(buffer, encodedStr.c_str(), len);
32 return len;
33 }
34
GetMcidFromDict(const CPDF_Dictionary * dict)35 int GetMcidFromDict(const CPDF_Dictionary* dict) {
36 if (dict && dict->GetNameFor("Type") == "MCR") {
37 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor("MCID");
38 if (obj && obj->IsNumber())
39 return obj->GetInteger();
40 }
41 return -1;
42 }
43
44 } // namespace
45
46 FPDF_EXPORT FPDF_STRUCTTREE FPDF_CALLCONV
FPDF_StructTree_GetForPage(FPDF_PAGE page)47 FPDF_StructTree_GetForPage(FPDF_PAGE page) {
48 CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
49 if (!pPage)
50 return nullptr;
51
52 // Caller takes onwership.
53 return FPDFStructTreeFromCPDFStructTree(
54 CPDF_StructTree::LoadPage(pPage->GetDocument(), pPage->GetDict())
55 .release());
56 }
57
58 FPDF_EXPORT void FPDF_CALLCONV
FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree)59 FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree) {
60 std::unique_ptr<CPDF_StructTree>(
61 CPDFStructTreeFromFPDFStructTree(struct_tree));
62 }
63
64 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree)65 FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree) {
66 CPDF_StructTree* tree = CPDFStructTreeFromFPDFStructTree(struct_tree);
67 if (!tree)
68 return -1;
69
70 FX_SAFE_INT32 tmp_size = tree->CountTopElements();
71 return tmp_size.ValueOrDefault(-1);
72 }
73
74 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree,int index)75 FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree, int index) {
76 CPDF_StructTree* tree = CPDFStructTreeFromFPDFStructTree(struct_tree);
77 if (!tree || index < 0 ||
78 static_cast<size_t>(index) >= tree->CountTopElements()) {
79 return nullptr;
80 }
81 return FPDFStructElementFromCPDFStructElement(
82 tree->GetTopElement(static_cast<size_t>(index)));
83 }
84
85 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)86 FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,
87 void* buffer,
88 unsigned long buflen) {
89 CPDF_StructElement* elem =
90 CPDFStructElementFromFPDFStructElement(struct_element);
91 return elem ? WideStringToBuffer(elem->GetAltText(), buffer, buflen) : 0;
92 }
93
94 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetActualText(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)95 FPDF_StructElement_GetActualText(FPDF_STRUCTELEMENT struct_element,
96 void* buffer,
97 unsigned long buflen) {
98 CPDF_StructElement* elem =
99 CPDFStructElementFromFPDFStructElement(struct_element);
100 return elem ? WideStringToBuffer(elem->GetActualText(), buffer, buflen) : 0;
101 }
102
103 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetID(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)104 FPDF_StructElement_GetID(FPDF_STRUCTELEMENT struct_element,
105 void* buffer,
106 unsigned long buflen) {
107 CPDF_StructElement* elem =
108 CPDFStructElementFromFPDFStructElement(struct_element);
109 if (!elem)
110 return 0;
111 absl::optional<WideString> id = elem->GetID();
112 if (!id.has_value())
113 return 0;
114 return Utf16EncodeMaybeCopyAndReturnLength(id.value(), buffer, buflen);
115 }
116
117 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetLang(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)118 FPDF_StructElement_GetLang(FPDF_STRUCTELEMENT struct_element,
119 void* buffer,
120 unsigned long buflen) {
121 CPDF_StructElement* elem =
122 CPDFStructElementFromFPDFStructElement(struct_element);
123 if (!elem)
124 return 0;
125 absl::optional<WideString> lang = elem->GetLang();
126 if (!lang.has_value())
127 return 0;
128 return Utf16EncodeMaybeCopyAndReturnLength(lang.value(), buffer, buflen);
129 }
130
131 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetAttributeCount(FPDF_STRUCTELEMENT struct_element)132 FPDF_StructElement_GetAttributeCount(FPDF_STRUCTELEMENT struct_element) {
133 CPDF_StructElement* elem =
134 CPDFStructElementFromFPDFStructElement(struct_element);
135 if (!elem)
136 return -1;
137 RetainPtr<const CPDF_Object> attr_obj = elem->GetA();
138 if (!attr_obj)
139 return -1;
140 if (attr_obj->IsArray())
141 return fxcrt::CollectionSize<int>(*attr_obj->AsArray());
142 return attr_obj->IsDictionary() ? 1 : -1;
143 }
144
145 FPDF_EXPORT FPDF_STRUCTELEMENT_ATTR FPDF_CALLCONV
FPDF_StructElement_GetAttributeAtIndex(FPDF_STRUCTELEMENT struct_element,int index)146 FPDF_StructElement_GetAttributeAtIndex(FPDF_STRUCTELEMENT struct_element,
147 int index) {
148 CPDF_StructElement* elem =
149 CPDFStructElementFromFPDFStructElement(struct_element);
150 if (!elem)
151 return nullptr;
152
153 RetainPtr<const CPDF_Object> attr_obj = elem->GetA();
154 if (!attr_obj)
155 return nullptr;
156
157 if (attr_obj->IsDictionary()) {
158 return index == 0 ? FPDFStructElementAttrFromCPDFDictionary(
159 attr_obj->AsDictionary())
160 : nullptr;
161 }
162 if (attr_obj->IsArray()) {
163 const CPDF_Array* array = attr_obj->AsArray();
164 if (index < 0 || static_cast<size_t>(index) >= array->size())
165 return nullptr;
166
167 // TODO(tsepez): should embedder take a reference here?
168 // Unretained reference in public API. NOLINTNEXTLINE
169 return FPDFStructElementAttrFromCPDFDictionary(array->GetDictAt(index));
170 }
171 return nullptr;
172 }
173
174 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetStringAttribute(FPDF_STRUCTELEMENT struct_element,FPDF_BYTESTRING attr_name,void * buffer,unsigned long buflen)175 FPDF_StructElement_GetStringAttribute(FPDF_STRUCTELEMENT struct_element,
176 FPDF_BYTESTRING attr_name,
177 void* buffer,
178 unsigned long buflen) {
179 CPDF_StructElement* elem =
180 CPDFStructElementFromFPDFStructElement(struct_element);
181 if (!elem)
182 return 0;
183 RetainPtr<const CPDF_Array> array = ToArray(elem->GetA());
184 if (!array)
185 return 0;
186 CPDF_ArrayLocker locker(array);
187 for (const RetainPtr<CPDF_Object>& obj : locker) {
188 const CPDF_Dictionary* obj_dict = obj->AsDictionary();
189 if (!obj_dict)
190 continue;
191 RetainPtr<const CPDF_Object> attr = obj_dict->GetObjectFor(attr_name);
192 if (!attr || !(attr->IsString() || attr->IsName()))
193 continue;
194 return Utf16EncodeMaybeCopyAndReturnLength(attr->GetUnicodeText(), buffer,
195 buflen);
196 }
197 return 0;
198 }
199
200 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentID(FPDF_STRUCTELEMENT struct_element)201 FPDF_StructElement_GetMarkedContentID(FPDF_STRUCTELEMENT struct_element) {
202 CPDF_StructElement* elem =
203 CPDFStructElementFromFPDFStructElement(struct_element);
204 if (!elem)
205 return -1;
206 RetainPtr<const CPDF_Object> p = elem->GetK();
207 return p && p->IsNumber() ? p->GetInteger() : -1;
208 }
209
210 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)211 FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element,
212 void* buffer,
213 unsigned long buflen) {
214 CPDF_StructElement* elem =
215 CPDFStructElementFromFPDFStructElement(struct_element);
216 return elem ? WideStringToBuffer(
217 WideString::FromUTF8(elem->GetType().AsStringView()),
218 buffer, buflen)
219 : 0;
220 }
221
222 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)223 FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element,
224 void* buffer,
225 unsigned long buflen) {
226 CPDF_StructElement* elem =
227 CPDFStructElementFromFPDFStructElement(struct_element);
228 return elem ? WideStringToBuffer(
229 WideString::FromUTF8(elem->GetObjType().AsStringView()),
230 buffer, buflen)
231 : 0;
232 }
233
234 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)235 FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,
236 void* buffer,
237 unsigned long buflen) {
238 CPDF_StructElement* elem =
239 CPDFStructElementFromFPDFStructElement(struct_element);
240 return elem ? WideStringToBuffer(elem->GetTitle(), buffer, buflen) : 0;
241 }
242
243 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element)244 FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) {
245 CPDF_StructElement* elem =
246 CPDFStructElementFromFPDFStructElement(struct_element);
247 if (!elem)
248 return -1;
249
250 FX_SAFE_INT32 tmp_size = elem->CountKids();
251 return tmp_size.ValueOrDefault(-1);
252 }
253
254 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,int index)255 FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,
256 int index) {
257 CPDF_StructElement* elem =
258 CPDFStructElementFromFPDFStructElement(struct_element);
259 if (!elem || index < 0 || static_cast<size_t>(index) >= elem->CountKids())
260 return nullptr;
261
262 return FPDFStructElementFromCPDFStructElement(elem->GetKidIfElement(index));
263 }
264
265 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element)266 FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element) {
267 CPDF_StructElement* elem =
268 CPDFStructElementFromFPDFStructElement(struct_element);
269 CPDF_StructElement* parent = elem ? elem->GetParent() : nullptr;
270 if (!parent) {
271 return nullptr;
272 }
273 return FPDFStructElementFromCPDFStructElement(parent);
274 }
275
276 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_Attr_GetCount(FPDF_STRUCTELEMENT_ATTR struct_attribute)277 FPDF_StructElement_Attr_GetCount(FPDF_STRUCTELEMENT_ATTR struct_attribute) {
278 const CPDF_Dictionary* dict =
279 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
280 if (!dict)
281 return -1;
282 return fxcrt::CollectionSize<int>(*dict);
283 }
284
285 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetName(FPDF_STRUCTELEMENT_ATTR struct_attribute,int index,void * buffer,unsigned long buflen,unsigned long * out_buflen)286 FPDF_StructElement_Attr_GetName(FPDF_STRUCTELEMENT_ATTR struct_attribute,
287 int index,
288 void* buffer,
289 unsigned long buflen,
290 unsigned long* out_buflen) {
291 if (!out_buflen || !buffer)
292 return false;
293
294 const CPDF_Dictionary* dict =
295 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
296 if (!dict)
297 return false;
298
299 CPDF_DictionaryLocker locker(dict);
300 for (auto& it : locker) {
301 if (index == 0) {
302 *out_buflen =
303 NulTerminateMaybeCopyAndReturnLength(it.first, buffer, buflen);
304 return true;
305 }
306 --index;
307 }
308 return false;
309 }
310
311 FPDF_EXPORT FPDF_OBJECT_TYPE FPDF_CALLCONV
FPDF_StructElement_Attr_GetType(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name)312 FPDF_StructElement_Attr_GetType(FPDF_STRUCTELEMENT_ATTR struct_attribute,
313 FPDF_BYTESTRING name) {
314 const CPDF_Dictionary* dict =
315 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
316 if (!dict)
317 return FPDF_OBJECT_UNKNOWN;
318
319 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
320 return obj ? obj->GetType() : FPDF_OBJECT_UNKNOWN;
321 }
322
FPDF_StructElement_Attr_GetBooleanValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,FPDF_BOOL * out_value)323 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_StructElement_Attr_GetBooleanValue(
324 FPDF_STRUCTELEMENT_ATTR struct_attribute,
325 FPDF_BYTESTRING name,
326 FPDF_BOOL* out_value) {
327 if (!out_value)
328 return false;
329
330 const CPDF_Dictionary* dict =
331 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
332 if (!dict)
333 return false;
334
335 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
336 if (!obj || !obj->IsBoolean())
337 return false;
338
339 *out_value = obj->GetInteger();
340 return true;
341 }
342
343 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetNumberValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,float * out_value)344 FPDF_StructElement_Attr_GetNumberValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
345 FPDF_BYTESTRING name,
346 float* out_value) {
347 if (!out_value)
348 return false;
349
350 const CPDF_Dictionary* dict =
351 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
352 if (!dict)
353 return false;
354
355 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
356 if (!obj || !obj->IsNumber())
357 return false;
358
359 *out_value = obj->GetNumber();
360 return true;
361 }
362
363 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetStringValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,void * buffer,unsigned long buflen,unsigned long * out_buflen)364 FPDF_StructElement_Attr_GetStringValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
365 FPDF_BYTESTRING name,
366 void* buffer,
367 unsigned long buflen,
368 unsigned long* out_buflen) {
369 if (!out_buflen)
370 return false;
371
372 const CPDF_Dictionary* dict =
373 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
374 if (!dict)
375 return false;
376
377 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
378 if (!obj || !(obj->IsString() || obj->IsName()))
379 return false;
380
381 *out_buflen = Utf16EncodeMaybeCopyAndReturnLength(
382 WideString::FromUTF8(obj->GetString().AsStringView()), buffer, buflen);
383 return true;
384 }
385
386 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetBlobValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,void * buffer,unsigned long buflen,unsigned long * out_buflen)387 FPDF_StructElement_Attr_GetBlobValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
388 FPDF_BYTESTRING name,
389 void* buffer,
390 unsigned long buflen,
391 unsigned long* out_buflen) {
392 if (!out_buflen)
393 return false;
394
395 const CPDF_Dictionary* dict =
396 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
397 if (!dict)
398 return false;
399
400 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
401 if (!obj || !obj->IsString())
402 return false;
403
404 ByteString result = obj->GetString();
405 const unsigned long len =
406 pdfium::base::checked_cast<unsigned long>(result.GetLength());
407 if (buffer && len <= buflen)
408 memcpy(buffer, result.c_str(), len);
409
410 *out_buflen = len;
411 return true;
412 }
413
414 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentIdCount(FPDF_STRUCTELEMENT struct_element)415 FPDF_StructElement_GetMarkedContentIdCount(FPDF_STRUCTELEMENT struct_element) {
416 CPDF_StructElement* elem =
417 CPDFStructElementFromFPDFStructElement(struct_element);
418 if (!elem)
419 return -1;
420 RetainPtr<const CPDF_Object> p = elem->GetK();
421 if (!p)
422 return -1;
423
424 if (p->IsNumber() || p->IsDictionary())
425 return 1;
426
427 return p->IsArray() ? fxcrt::CollectionSize<int>(*p->AsArray()) : -1;
428 }
429
430 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentIdAtIndex(FPDF_STRUCTELEMENT struct_element,int index)431 FPDF_StructElement_GetMarkedContentIdAtIndex(FPDF_STRUCTELEMENT struct_element,
432 int index) {
433 CPDF_StructElement* elem =
434 CPDFStructElementFromFPDFStructElement(struct_element);
435 if (!elem)
436 return -1;
437 RetainPtr<const CPDF_Object> p = elem->GetK();
438 if (!p)
439 return -1;
440
441 if (p->IsNumber())
442 return index == 0 ? p->GetInteger() : -1;
443
444 if (p->IsDictionary())
445 return GetMcidFromDict(p->GetDict().Get());
446
447 if (p->IsArray()) {
448 const CPDF_Array* array = p->AsArray();
449 if (index < 0 || static_cast<size_t>(index) >= array->size())
450 return -1;
451 RetainPtr<const CPDF_Object> array_elem = array->GetObjectAt(index);
452 if (array_elem->IsNumber())
453 return array_elem->GetInteger();
454 if (array_elem->IsDictionary()) {
455 return GetMcidFromDict(array_elem->GetDict().Get());
456 }
457 }
458 return -1;
459 }
460