1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include <map>
8 #include <memory>
9 #include <utility>
10
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_dictionary.h"
13 #include "core/fpdfapi/parser/cpdf_document.h"
14 #include "core/fpdfapi/parser/cpdf_name.h"
15 #include "core/fpdfapi/parser/cpdf_number.h"
16 #include "core/fpdfapi/parser/cpdf_reference.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfdoc/cpdf_numbertree.h"
19 #include "core/fpdfdoc/fpdf_tagged.h"
20 #include "core/fpdfdoc/tagged_int.h"
21 #include "third_party/base/ptr_util.h"
22
23 namespace {
24
25 const int nMaxRecursion = 32;
26
IsTagged(const CPDF_Document * pDoc)27 bool IsTagged(const CPDF_Document* pDoc) {
28 CPDF_Dictionary* pCatalog = pDoc->GetRoot();
29 CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo");
30 return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
31 }
32
33 } // namespace
34
CPDF_StructKid()35 CPDF_StructKid::CPDF_StructKid()
36 : m_Type(Invalid),
37 m_pDict(nullptr),
38 m_PageObjNum(0),
39 m_RefObjNum(0),
40 m_ContentId(0) {}
41
42 CPDF_StructKid::CPDF_StructKid(const CPDF_StructKid& that) = default;
43
~CPDF_StructKid()44 CPDF_StructKid::~CPDF_StructKid() {}
45
46 // static
LoadPage(const CPDF_Document * pDoc,const CPDF_Dictionary * pPageDict)47 std::unique_ptr<IPDF_StructTree> IPDF_StructTree::LoadPage(
48 const CPDF_Document* pDoc,
49 const CPDF_Dictionary* pPageDict) {
50 if (!IsTagged(pDoc))
51 return nullptr;
52
53 auto pTree = pdfium::MakeUnique<CPDF_StructTree>(pDoc);
54 pTree->LoadPageTree(pPageDict);
55 return std::move(pTree);
56 }
57
CPDF_StructTree(const CPDF_Document * pDoc)58 CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
59 : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
60 m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr),
61 m_pPage(nullptr) {}
62
~CPDF_StructTree()63 CPDF_StructTree::~CPDF_StructTree() {}
64
CountTopElements() const65 int CPDF_StructTree::CountTopElements() const {
66 return pdfium::CollectionSize<int>(m_Kids);
67 }
68
GetTopElement(int i) const69 IPDF_StructElement* CPDF_StructTree::GetTopElement(int i) const {
70 return m_Kids[i].Get();
71 }
72
LoadPageTree(const CPDF_Dictionary * pPageDict)73 void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) {
74 m_pPage = pPageDict;
75 if (!m_pTreeRoot)
76 return;
77
78 CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K");
79 if (!pKids)
80 return;
81
82 uint32_t dwKids = 0;
83 if (pKids->IsDictionary())
84 dwKids = 1;
85 else if (CPDF_Array* pArray = pKids->AsArray())
86 dwKids = pArray->GetCount();
87 else
88 return;
89
90 m_Kids.clear();
91 m_Kids.resize(dwKids);
92 CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree");
93 if (!pParentTree)
94 return;
95
96 CPDF_NumberTree parent_tree(pParentTree);
97 int parents_id = pPageDict->GetIntegerFor("StructParents", -1);
98 if (parents_id < 0)
99 return;
100
101 CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id));
102 if (!pParentArray)
103 return;
104
105 std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>> element_map;
106 for (size_t i = 0; i < pParentArray->GetCount(); i++) {
107 if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i))
108 AddPageNode(pParent, &element_map);
109 }
110 }
111
AddPageNode(CPDF_Dictionary * pDict,std::map<CPDF_Dictionary *,CFX_RetainPtr<CPDF_StructElement>> * map,int nLevel)112 CFX_RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
113 CPDF_Dictionary* pDict,
114 std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map,
115 int nLevel) {
116 if (nLevel > nMaxRecursion)
117 return nullptr;
118
119 auto it = map->find(pDict);
120 if (it != map->end())
121 return it->second;
122
123 auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, nullptr, pDict);
124 (*map)[pDict] = pElement;
125 CPDF_Dictionary* pParent = pDict->GetDictFor("P");
126 if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") {
127 if (!AddTopLevelNode(pDict, pElement))
128 map->erase(pDict);
129 return pElement;
130 }
131
132 CFX_RetainPtr<CPDF_StructElement> pParentElement =
133 AddPageNode(pParent, map, nLevel + 1);
134 bool bSave = false;
135 for (CPDF_StructKid& kid : *pParentElement->GetKids()) {
136 if (kid.m_Type == CPDF_StructKid::Element && kid.m_pDict == pDict) {
137 kid.m_pElement = pElement;
138 bSave = true;
139 }
140 }
141 if (!bSave)
142 map->erase(pDict);
143 return pElement;
144 }
145
AddTopLevelNode(CPDF_Dictionary * pDict,const CFX_RetainPtr<CPDF_StructElement> & pElement)146 bool CPDF_StructTree::AddTopLevelNode(
147 CPDF_Dictionary* pDict,
148 const CFX_RetainPtr<CPDF_StructElement>& pElement) {
149 CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K");
150 if (!pObj)
151 return false;
152
153 if (pObj->IsDictionary()) {
154 if (pObj->GetObjNum() != pDict->GetObjNum())
155 return false;
156 m_Kids[0] = pElement;
157 }
158 if (CPDF_Array* pTopKids = pObj->AsArray()) {
159 bool bSave = false;
160 for (size_t i = 0; i < pTopKids->GetCount(); i++) {
161 CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i));
162 if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
163 m_Kids[i] = pElement;
164 bSave = true;
165 }
166 }
167 if (!bSave)
168 return false;
169 }
170 return true;
171 }
172
CPDF_StructElement(CPDF_StructTree * pTree,CPDF_StructElement * pParent,CPDF_Dictionary * pDict)173 CPDF_StructElement::CPDF_StructElement(CPDF_StructTree* pTree,
174 CPDF_StructElement* pParent,
175 CPDF_Dictionary* pDict)
176 : m_pTree(pTree),
177 m_pParent(pParent),
178 m_pDict(pDict),
179 m_Type(pDict->GetStringFor("S")) {
180 if (pTree->m_pRoleMap) {
181 CFX_ByteString mapped = pTree->m_pRoleMap->GetStringFor(m_Type);
182 if (!mapped.IsEmpty())
183 m_Type = mapped;
184 }
185 LoadKids(pDict);
186 }
187
GetTree() const188 IPDF_StructTree* CPDF_StructElement::GetTree() const {
189 return m_pTree;
190 }
191
GetType() const192 const CFX_ByteString& CPDF_StructElement::GetType() const {
193 return m_Type;
194 }
195
GetParent() const196 IPDF_StructElement* CPDF_StructElement::GetParent() const {
197 return m_pParent;
198 }
199
GetDict() const200 CPDF_Dictionary* CPDF_StructElement::GetDict() const {
201 return m_pDict;
202 }
203
CountKids() const204 int CPDF_StructElement::CountKids() const {
205 return pdfium::CollectionSize<int>(m_Kids);
206 }
207
GetKidIfElement(int index) const208 IPDF_StructElement* CPDF_StructElement::GetKidIfElement(int index) const {
209 if (m_Kids[index].m_Type != CPDF_StructKid::Element)
210 return nullptr;
211
212 return m_Kids[index].m_pElement.Get();
213 }
214
~CPDF_StructElement()215 CPDF_StructElement::~CPDF_StructElement() {}
216
LoadKids(CPDF_Dictionary * pDict)217 void CPDF_StructElement::LoadKids(CPDF_Dictionary* pDict) {
218 CPDF_Object* pObj = pDict->GetObjectFor("Pg");
219 uint32_t PageObjNum = 0;
220 if (CPDF_Reference* pRef = ToReference(pObj))
221 PageObjNum = pRef->GetRefObjNum();
222
223 CPDF_Object* pKids = pDict->GetDirectObjectFor("K");
224 if (!pKids)
225 return;
226
227 m_Kids.clear();
228 if (CPDF_Array* pArray = pKids->AsArray()) {
229 m_Kids.resize(pArray->GetCount());
230 for (uint32_t i = 0; i < pArray->GetCount(); i++) {
231 CPDF_Object* pKid = pArray->GetDirectObjectAt(i);
232 LoadKid(PageObjNum, pKid, &m_Kids[i]);
233 }
234 } else {
235 m_Kids.resize(1);
236 LoadKid(PageObjNum, pKids, &m_Kids[0]);
237 }
238 }
LoadKid(uint32_t PageObjNum,CPDF_Object * pKidObj,CPDF_StructKid * pKid)239 void CPDF_StructElement::LoadKid(uint32_t PageObjNum,
240 CPDF_Object* pKidObj,
241 CPDF_StructKid* pKid) {
242 pKid->m_Type = CPDF_StructKid::Invalid;
243 if (!pKidObj)
244 return;
245
246 if (pKidObj->IsNumber()) {
247 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
248 return;
249 }
250 pKid->m_Type = CPDF_StructKid::PageContent;
251 pKid->m_ContentId = pKidObj->GetInteger();
252 pKid->m_PageObjNum = PageObjNum;
253 return;
254 }
255
256 CPDF_Dictionary* pKidDict = pKidObj->AsDictionary();
257 if (!pKidDict)
258 return;
259
260 if (CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Pg")))
261 PageObjNum = pRef->GetRefObjNum();
262
263 CFX_ByteString type = pKidDict->GetStringFor("Type");
264 if (type == "MCR") {
265 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
266 return;
267 }
268 pKid->m_Type = CPDF_StructKid::StreamContent;
269 CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Stm"));
270 pKid->m_RefObjNum = pRef ? pRef->GetRefObjNum() : 0;
271 pKid->m_PageObjNum = PageObjNum;
272 pKid->m_ContentId = pKidDict->GetIntegerFor("MCID");
273 } else if (type == "OBJR") {
274 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
275 return;
276 }
277 pKid->m_Type = CPDF_StructKid::Object;
278 CPDF_Reference* pObj = ToReference(pKidDict->GetObjectFor("Obj"));
279 pKid->m_RefObjNum = pObj ? pObj->GetRefObjNum() : 0;
280 pKid->m_PageObjNum = PageObjNum;
281 } else {
282 pKid->m_Type = CPDF_StructKid::Element;
283 pKid->m_pDict = pKidDict;
284 if (!m_pTree->m_pPage) {
285 pKid->m_pElement =
286 pdfium::MakeRetain<CPDF_StructElement>(m_pTree, this, pKidDict);
287 } else {
288 pKid->m_pElement = nullptr;
289 }
290 }
291 }
FindAttrDict(CPDF_Object * pAttrs,const CFX_ByteStringC & owner,FX_FLOAT nLevel=0.0F)292 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs,
293 const CFX_ByteStringC& owner,
294 FX_FLOAT nLevel = 0.0F) {
295 if (nLevel > nMaxRecursion)
296 return nullptr;
297 if (!pAttrs)
298 return nullptr;
299
300 CPDF_Dictionary* pDict = nullptr;
301 if (pAttrs->IsDictionary()) {
302 pDict = pAttrs->AsDictionary();
303 } else if (CPDF_Stream* pStream = pAttrs->AsStream()) {
304 pDict = pStream->GetDict();
305 } else if (CPDF_Array* pArray = pAttrs->AsArray()) {
306 for (uint32_t i = 0; i < pArray->GetCount(); i++) {
307 CPDF_Object* pElement = pArray->GetDirectObjectAt(i);
308 pDict = FindAttrDict(pElement, owner, nLevel + 1);
309 if (pDict)
310 return pDict;
311 }
312 }
313 if (pDict && pDict->GetStringFor("O") == owner)
314 return pDict;
315 return nullptr;
316 }
GetAttr(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,bool bInheritable,FX_FLOAT fLevel)317 CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner,
318 const CFX_ByteStringC& name,
319 bool bInheritable,
320 FX_FLOAT fLevel) {
321 if (fLevel > nMaxRecursion) {
322 return nullptr;
323 }
324 if (bInheritable) {
325 CPDF_Object* pAttr = GetAttr(owner, name, false);
326 if (pAttr) {
327 return pAttr;
328 }
329 if (!m_pParent) {
330 return nullptr;
331 }
332 return m_pParent->GetAttr(owner, name, true, fLevel + 1);
333 }
334 CPDF_Object* pA = m_pDict->GetDirectObjectFor("A");
335 if (pA) {
336 CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
337 if (pAttrDict) {
338 CPDF_Object* pAttr = pAttrDict->GetDirectObjectFor(CFX_ByteString(name));
339 if (pAttr) {
340 return pAttr;
341 }
342 }
343 }
344 CPDF_Object* pC = m_pDict->GetDirectObjectFor("C");
345 if (!pC)
346 return nullptr;
347
348 CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDictFor("ClassMap");
349 if (!pClassMap)
350 return nullptr;
351
352 if (CPDF_Array* pArray = pC->AsArray()) {
353 for (uint32_t i = 0; i < pArray->GetCount(); i++) {
354 CFX_ByteString class_name = pArray->GetStringAt(i);
355 CPDF_Dictionary* pClassDict = pClassMap->GetDictFor(class_name);
356 if (pClassDict && pClassDict->GetStringFor("O") == owner)
357 return pClassDict->GetDirectObjectFor(CFX_ByteString(name));
358 }
359 return nullptr;
360 }
361 CFX_ByteString class_name = pC->GetString();
362 CPDF_Dictionary* pClassDict = pClassMap->GetDictFor(class_name);
363 if (pClassDict && pClassDict->GetStringFor("O") == owner)
364 return pClassDict->GetDirectObjectFor(CFX_ByteString(name));
365 return nullptr;
366 }
GetAttr(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,bool bInheritable,int subindex)367 CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner,
368 const CFX_ByteStringC& name,
369 bool bInheritable,
370 int subindex) {
371 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
372 CPDF_Array* pArray = ToArray(pAttr);
373 if (!pArray || subindex == -1)
374 return pAttr;
375
376 if (subindex >= static_cast<int>(pArray->GetCount()))
377 return pAttr;
378 return pArray->GetDirectObjectAt(subindex);
379 }
GetName(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,const CFX_ByteStringC & default_value,bool bInheritable,int subindex)380 CFX_ByteString CPDF_StructElement::GetName(const CFX_ByteStringC& owner,
381 const CFX_ByteStringC& name,
382 const CFX_ByteStringC& default_value,
383 bool bInheritable,
384 int subindex) {
385 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
386 if (ToName(pAttr))
387 return pAttr->GetString();
388 return CFX_ByteString(default_value);
389 }
390
GetColor(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,FX_ARGB default_value,bool bInheritable,int subindex)391 FX_ARGB CPDF_StructElement::GetColor(const CFX_ByteStringC& owner,
392 const CFX_ByteStringC& name,
393 FX_ARGB default_value,
394 bool bInheritable,
395 int subindex) {
396 CPDF_Array* pArray = ToArray(GetAttr(owner, name, bInheritable, subindex));
397 if (!pArray)
398 return default_value;
399 return 0xff000000 | ((int)(pArray->GetNumberAt(0) * 255) << 16) |
400 ((int)(pArray->GetNumberAt(1) * 255) << 8) |
401 (int)(pArray->GetNumberAt(2) * 255);
402 }
GetNumber(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,FX_FLOAT default_value,bool bInheritable,int subindex)403 FX_FLOAT CPDF_StructElement::GetNumber(const CFX_ByteStringC& owner,
404 const CFX_ByteStringC& name,
405 FX_FLOAT default_value,
406 bool bInheritable,
407 int subindex) {
408 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
409 return ToNumber(pAttr) ? pAttr->GetNumber() : default_value;
410 }
GetInteger(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,int default_value,bool bInheritable,int subindex)411 int CPDF_StructElement::GetInteger(const CFX_ByteStringC& owner,
412 const CFX_ByteStringC& name,
413 int default_value,
414 bool bInheritable,
415 int subindex) {
416 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
417 return ToNumber(pAttr) ? pAttr->GetInteger() : default_value;
418 }
419