1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfdoc/cpdf_structtree.h"
8
9 #include <utility>
10
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_dictionary.h"
13 #include "core/fpdfapi/parser/cpdf_document.h"
14 #include "core/fpdfapi/parser/cpdf_number.h"
15 #include "core/fpdfapi/parser/cpdf_reference.h"
16 #include "core/fpdfdoc/cpdf_numbertree.h"
17 #include "core/fpdfdoc/cpdf_structelement.h"
18 #include "core/fxcrt/stl_util.h"
19
20 namespace {
21
IsTagged(const CPDF_Document * pDoc)22 bool IsTagged(const CPDF_Document* pDoc) {
23 RetainPtr<const CPDF_Dictionary> pMarkInfo =
24 pDoc->GetRoot()->GetDictFor("MarkInfo");
25 return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
26 }
27
28 } // namespace
29
30 // static
LoadPage(const CPDF_Document * pDoc,RetainPtr<const CPDF_Dictionary> pPageDict)31 std::unique_ptr<CPDF_StructTree> CPDF_StructTree::LoadPage(
32 const CPDF_Document* pDoc,
33 RetainPtr<const CPDF_Dictionary> pPageDict) {
34 if (!IsTagged(pDoc))
35 return nullptr;
36
37 auto pTree = std::make_unique<CPDF_StructTree>(pDoc);
38 pTree->LoadPageTree(std::move(pPageDict));
39 return pTree;
40 }
41
CPDF_StructTree(const CPDF_Document * pDoc)42 CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
43 : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
44 m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr) {}
45
46 CPDF_StructTree::~CPDF_StructTree() = default;
47
GetRoleMapNameFor(const ByteString & type) const48 ByteString CPDF_StructTree::GetRoleMapNameFor(const ByteString& type) const {
49 if (m_pRoleMap) {
50 ByteString mapped = m_pRoleMap->GetNameFor(type);
51 if (!mapped.IsEmpty())
52 return mapped;
53 }
54 return type;
55 }
56
LoadPageTree(RetainPtr<const CPDF_Dictionary> pPageDict)57 void CPDF_StructTree::LoadPageTree(RetainPtr<const CPDF_Dictionary> pPageDict) {
58 m_pPage = std::move(pPageDict);
59 if (!m_pTreeRoot)
60 return;
61
62 RetainPtr<const CPDF_Object> pKids = m_pTreeRoot->GetDirectObjectFor("K");
63 if (!pKids)
64 return;
65
66 uint32_t dwKids = 0;
67 if (pKids->IsDictionary())
68 dwKids = 1;
69 else if (const CPDF_Array* pArray = pKids->AsArray())
70 dwKids = fxcrt::CollectionSize<uint32_t>(*pArray);
71 else
72 return;
73
74 m_Kids.clear();
75 m_Kids.resize(dwKids);
76
77 RetainPtr<const CPDF_Dictionary> pParentTree =
78 m_pTreeRoot->GetDictFor("ParentTree");
79 if (!pParentTree)
80 return;
81
82 CPDF_NumberTree parent_tree(std::move(pParentTree));
83 int parents_id = m_pPage->GetIntegerFor("StructParents", -1);
84 if (parents_id < 0)
85 return;
86
87 RetainPtr<const CPDF_Array> pParentArray =
88 ToArray(parent_tree.LookupValue(parents_id));
89 if (!pParentArray)
90 return;
91
92 StructElementMap element_map;
93 for (size_t i = 0; i < pParentArray->size(); i++) {
94 RetainPtr<const CPDF_Dictionary> pParent = pParentArray->GetDictAt(i);
95 if (pParent)
96 AddPageNode(std::move(pParent), &element_map, 0);
97 }
98 }
99
AddPageNode(RetainPtr<const CPDF_Dictionary> pDict,StructElementMap * map,int nLevel)100 RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
101 RetainPtr<const CPDF_Dictionary> pDict,
102 StructElementMap* map,
103 int nLevel) {
104 static constexpr int kStructTreeMaxRecursion = 32;
105 if (nLevel > kStructTreeMaxRecursion)
106 return nullptr;
107
108 auto it = map->find(pDict);
109 if (it != map->end())
110 return it->second;
111
112 RetainPtr<const CPDF_Dictionary> key(pDict);
113 auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, pDict);
114 (*map)[key] = pElement;
115 RetainPtr<const CPDF_Dictionary> pParent = pDict->GetDictFor("P");
116 if (!pParent || pParent->GetNameFor("Type") == "StructTreeRoot") {
117 if (!AddTopLevelNode(pDict, pElement))
118 map->erase(key);
119 return pElement;
120 }
121
122 RetainPtr<CPDF_StructElement> pParentElement =
123 AddPageNode(std::move(pParent), map, nLevel + 1);
124 if (!pParentElement)
125 return pElement;
126
127 if (!pParentElement->UpdateKidIfElement(pDict, pElement.Get()))
128 map->erase(key);
129
130 pElement->SetParent(pParentElement.Get());
131
132 return pElement;
133 }
134
AddTopLevelNode(const CPDF_Dictionary * pDict,const RetainPtr<CPDF_StructElement> & pElement)135 bool CPDF_StructTree::AddTopLevelNode(
136 const CPDF_Dictionary* pDict,
137 const RetainPtr<CPDF_StructElement>& pElement) {
138 RetainPtr<const CPDF_Object> pObj = m_pTreeRoot->GetDirectObjectFor("K");
139 if (!pObj)
140 return false;
141
142 if (pObj->IsDictionary()) {
143 if (pObj->GetObjNum() != pDict->GetObjNum())
144 return false;
145 m_Kids[0] = pElement;
146 }
147
148 const CPDF_Array* pTopKids = pObj->AsArray();
149 if (!pTopKids)
150 return true;
151
152 bool bSave = false;
153 for (size_t i = 0; i < pTopKids->size(); i++) {
154 RetainPtr<const CPDF_Reference> pKidRef =
155 ToReference(pTopKids->GetObjectAt(i));
156 if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
157 m_Kids[i] = pElement;
158 bSave = true;
159 }
160 }
161 return bSave;
162 }
163