• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfdoc/cpdf_structtree.h"
8 
9 #include <utility>
10 
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_dictionary.h"
13 #include "core/fpdfapi/parser/cpdf_document.h"
14 #include "core/fpdfapi/parser/cpdf_number.h"
15 #include "core/fpdfapi/parser/cpdf_reference.h"
16 #include "core/fpdfdoc/cpdf_numbertree.h"
17 #include "core/fpdfdoc/cpdf_structelement.h"
18 #include "core/fxcrt/stl_util.h"
19 
20 namespace {
21 
IsTagged(const CPDF_Document * pDoc)22 bool IsTagged(const CPDF_Document* pDoc) {
23   RetainPtr<const CPDF_Dictionary> pMarkInfo =
24       pDoc->GetRoot()->GetDictFor("MarkInfo");
25   return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
26 }
27 
28 }  // namespace
29 
30 // static
LoadPage(const CPDF_Document * pDoc,RetainPtr<const CPDF_Dictionary> pPageDict)31 std::unique_ptr<CPDF_StructTree> CPDF_StructTree::LoadPage(
32     const CPDF_Document* pDoc,
33     RetainPtr<const CPDF_Dictionary> pPageDict) {
34   if (!IsTagged(pDoc))
35     return nullptr;
36 
37   auto pTree = std::make_unique<CPDF_StructTree>(pDoc);
38   pTree->LoadPageTree(std::move(pPageDict));
39   return pTree;
40 }
41 
CPDF_StructTree(const CPDF_Document * pDoc)42 CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
43     : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
44       m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr) {}
45 
46 CPDF_StructTree::~CPDF_StructTree() = default;
47 
GetRoleMapNameFor(const ByteString & type) const48 ByteString CPDF_StructTree::GetRoleMapNameFor(const ByteString& type) const {
49   if (m_pRoleMap) {
50     ByteString mapped = m_pRoleMap->GetNameFor(type);
51     if (!mapped.IsEmpty())
52       return mapped;
53   }
54   return type;
55 }
56 
LoadPageTree(RetainPtr<const CPDF_Dictionary> pPageDict)57 void CPDF_StructTree::LoadPageTree(RetainPtr<const CPDF_Dictionary> pPageDict) {
58   m_pPage = std::move(pPageDict);
59   if (!m_pTreeRoot)
60     return;
61 
62   RetainPtr<const CPDF_Object> pKids = m_pTreeRoot->GetDirectObjectFor("K");
63   if (!pKids)
64     return;
65 
66   uint32_t dwKids = 0;
67   if (pKids->IsDictionary())
68     dwKids = 1;
69   else if (const CPDF_Array* pArray = pKids->AsArray())
70     dwKids = fxcrt::CollectionSize<uint32_t>(*pArray);
71   else
72     return;
73 
74   m_Kids.clear();
75   m_Kids.resize(dwKids);
76 
77   RetainPtr<const CPDF_Dictionary> pParentTree =
78       m_pTreeRoot->GetDictFor("ParentTree");
79   if (!pParentTree)
80     return;
81 
82   CPDF_NumberTree parent_tree(std::move(pParentTree));
83   int parents_id = m_pPage->GetIntegerFor("StructParents", -1);
84   if (parents_id < 0)
85     return;
86 
87   RetainPtr<const CPDF_Array> pParentArray =
88       ToArray(parent_tree.LookupValue(parents_id));
89   if (!pParentArray)
90     return;
91 
92   StructElementMap element_map;
93   for (size_t i = 0; i < pParentArray->size(); i++) {
94     RetainPtr<const CPDF_Dictionary> pParent = pParentArray->GetDictAt(i);
95     if (pParent)
96       AddPageNode(std::move(pParent), &element_map, 0);
97   }
98 }
99 
AddPageNode(RetainPtr<const CPDF_Dictionary> pDict,StructElementMap * map,int nLevel)100 RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
101     RetainPtr<const CPDF_Dictionary> pDict,
102     StructElementMap* map,
103     int nLevel) {
104   static constexpr int kStructTreeMaxRecursion = 32;
105   if (nLevel > kStructTreeMaxRecursion)
106     return nullptr;
107 
108   auto it = map->find(pDict);
109   if (it != map->end())
110     return it->second;
111 
112   RetainPtr<const CPDF_Dictionary> key(pDict);
113   auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, pDict);
114   (*map)[key] = pElement;
115   RetainPtr<const CPDF_Dictionary> pParent = pDict->GetDictFor("P");
116   if (!pParent || pParent->GetNameFor("Type") == "StructTreeRoot") {
117     if (!AddTopLevelNode(pDict, pElement))
118       map->erase(key);
119     return pElement;
120   }
121 
122   RetainPtr<CPDF_StructElement> pParentElement =
123       AddPageNode(std::move(pParent), map, nLevel + 1);
124   if (!pParentElement)
125     return pElement;
126 
127   if (!pParentElement->UpdateKidIfElement(pDict, pElement.Get()))
128     map->erase(key);
129 
130   pElement->SetParent(pParentElement.Get());
131 
132   return pElement;
133 }
134 
AddTopLevelNode(const CPDF_Dictionary * pDict,const RetainPtr<CPDF_StructElement> & pElement)135 bool CPDF_StructTree::AddTopLevelNode(
136     const CPDF_Dictionary* pDict,
137     const RetainPtr<CPDF_StructElement>& pElement) {
138   RetainPtr<const CPDF_Object> pObj = m_pTreeRoot->GetDirectObjectFor("K");
139   if (!pObj)
140     return false;
141 
142   if (pObj->IsDictionary()) {
143     if (pObj->GetObjNum() != pDict->GetObjNum())
144       return false;
145     m_Kids[0] = pElement;
146   }
147 
148   const CPDF_Array* pTopKids = pObj->AsArray();
149   if (!pTopKids)
150     return true;
151 
152   bool bSave = false;
153   for (size_t i = 0; i < pTopKids->size(); i++) {
154     RetainPtr<const CPDF_Reference> pKidRef =
155         ToReference(pTopKids->GetObjectAt(i));
156     if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
157       m_Kids[i] = pElement;
158       bSave = true;
159     }
160   }
161   return bSave;
162 }
163