1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfdoc/cpdf_structelement.h"
8
9 #include <utility>
10
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_dictionary.h"
13 #include "core/fpdfapi/parser/cpdf_name.h"
14 #include "core/fpdfapi/parser/cpdf_number.h"
15 #include "core/fpdfapi/parser/cpdf_object.h"
16 #include "core/fpdfapi/parser/cpdf_reference.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfdoc/cpdf_structtree.h"
19 #include "core/fxcrt/check.h"
20
21 CPDF_StructElement::Kid::Kid() = default;
22
23 CPDF_StructElement::Kid::Kid(const Kid& that) = default;
24
25 CPDF_StructElement::Kid::~Kid() = default;
26
CPDF_StructElement(const CPDF_StructTree * pTree,RetainPtr<const CPDF_Dictionary> pDict)27 CPDF_StructElement::CPDF_StructElement(const CPDF_StructTree* pTree,
28 RetainPtr<const CPDF_Dictionary> pDict)
29 : m_pTree(pTree),
30 m_pDict(std::move(pDict)),
31 m_Type(m_pTree->GetRoleMapNameFor(m_pDict->GetNameFor("S"))) {
32 LoadKids();
33 }
34
~CPDF_StructElement()35 CPDF_StructElement::~CPDF_StructElement() {
36 for (auto& kid : m_Kids) {
37 if (kid.m_Type == Kid::kElement && kid.m_pElement) {
38 kid.m_pElement->SetParent(nullptr);
39 }
40 }
41 }
42
GetObjType() const43 ByteString CPDF_StructElement::GetObjType() const {
44 return m_pDict->GetByteStringFor("Type");
45 }
46
GetAltText() const47 WideString CPDF_StructElement::GetAltText() const {
48 return m_pDict->GetUnicodeTextFor("Alt");
49 }
50
GetActualText() const51 WideString CPDF_StructElement::GetActualText() const {
52 return m_pDict->GetUnicodeTextFor("ActualText");
53 }
54
GetTitle() const55 WideString CPDF_StructElement::GetTitle() const {
56 return m_pDict->GetUnicodeTextFor("T");
57 }
58
GetID() const59 std::optional<WideString> CPDF_StructElement::GetID() const {
60 RetainPtr<const CPDF_Object> obj = m_pDict->GetObjectFor("ID");
61 if (!obj || !obj->IsString())
62 return std::nullopt;
63 return obj->GetUnicodeText();
64 }
65
GetLang() const66 std::optional<WideString> CPDF_StructElement::GetLang() const {
67 RetainPtr<const CPDF_Object> obj = m_pDict->GetObjectFor("Lang");
68 if (!obj || !obj->IsString())
69 return std::nullopt;
70 return obj->GetUnicodeText();
71 }
72
GetA() const73 RetainPtr<const CPDF_Object> CPDF_StructElement::GetA() const {
74 return m_pDict->GetObjectFor("A");
75 }
76
GetK() const77 RetainPtr<const CPDF_Object> CPDF_StructElement::GetK() const {
78 return m_pDict->GetObjectFor("K");
79 }
80
CountKids() const81 size_t CPDF_StructElement::CountKids() const {
82 return m_Kids.size();
83 }
84
GetKidIfElement(size_t index) const85 CPDF_StructElement* CPDF_StructElement::GetKidIfElement(size_t index) const {
86 return m_Kids[index].m_Type == Kid::kElement ? m_Kids[index].m_pElement.Get()
87 : nullptr;
88 }
89
GetKidContentId(size_t index) const90 int CPDF_StructElement::GetKidContentId(size_t index) const {
91 return m_Kids[index].m_Type == Kid::kStreamContent ||
92 m_Kids[index].m_Type == Kid::kPageContent
93 ? m_Kids[index].m_ContentId
94 : -1;
95 }
96
UpdateKidIfElement(const CPDF_Dictionary * pDict,CPDF_StructElement * pElement)97 bool CPDF_StructElement::UpdateKidIfElement(const CPDF_Dictionary* pDict,
98 CPDF_StructElement* pElement) {
99 bool bSave = false;
100 for (auto& kid : m_Kids) {
101 if (kid.m_Type == Kid::kElement && kid.m_pDict == pDict) {
102 kid.m_pElement.Reset(pElement);
103 bSave = true;
104 }
105 }
106 return bSave;
107 }
108
LoadKids()109 void CPDF_StructElement::LoadKids() {
110 RetainPtr<const CPDF_Object> pObj = m_pDict->GetObjectFor("Pg");
111 const CPDF_Reference* pRef = ToReference(pObj.Get());
112 const uint32_t page_obj_num = pRef ? pRef->GetRefObjNum() : 0;
113 RetainPtr<const CPDF_Object> pKids = m_pDict->GetDirectObjectFor("K");
114 if (!pKids)
115 return;
116
117 DCHECK(m_Kids.empty());
118 if (const CPDF_Array* pArray = pKids->AsArray()) {
119 m_Kids.resize(pArray->size());
120 for (size_t i = 0; i < pArray->size(); ++i) {
121 LoadKid(page_obj_num, pArray->GetDirectObjectAt(i), m_Kids[i]);
122 }
123 return;
124 }
125
126 m_Kids.resize(1);
127 LoadKid(page_obj_num, std::move(pKids), m_Kids[0]);
128 }
129
LoadKid(uint32_t page_obj_num,RetainPtr<const CPDF_Object> pKidObj,Kid & kid)130 void CPDF_StructElement::LoadKid(uint32_t page_obj_num,
131 RetainPtr<const CPDF_Object> pKidObj,
132 Kid& kid) {
133 if (!pKidObj)
134 return;
135
136 if (pKidObj->IsNumber()) {
137 if (m_pTree->GetPageObjNum() != page_obj_num) {
138 return;
139 }
140
141 kid.m_Type = Kid::kPageContent;
142 kid.m_ContentId = pKidObj->GetInteger();
143 kid.m_PageObjNum = page_obj_num;
144 return;
145 }
146
147 const CPDF_Dictionary* pKidDict = pKidObj->AsDictionary();
148 if (!pKidDict)
149 return;
150
151 if (RetainPtr<const CPDF_Reference> pRef =
152 ToReference(pKidDict->GetObjectFor("Pg"))) {
153 page_obj_num = pRef->GetRefObjNum();
154 }
155 ByteString type = pKidDict->GetNameFor("Type");
156 if ((type == "MCR" || type == "OBJR") &&
157 m_pTree->GetPageObjNum() != page_obj_num) {
158 return;
159 }
160
161 if (type == "MCR") {
162 kid.m_Type = Kid::kStreamContent;
163 RetainPtr<const CPDF_Reference> pRef =
164 ToReference(pKidDict->GetObjectFor("Stm"));
165 kid.m_RefObjNum = pRef ? pRef->GetRefObjNum() : 0;
166 kid.m_PageObjNum = page_obj_num;
167 kid.m_ContentId = pKidDict->GetIntegerFor("MCID");
168 return;
169 }
170
171 if (type == "OBJR") {
172 kid.m_Type = Kid::kObject;
173 RetainPtr<const CPDF_Reference> pObj =
174 ToReference(pKidDict->GetObjectFor("Obj"));
175 kid.m_RefObjNum = pObj ? pObj->GetRefObjNum() : 0;
176 kid.m_PageObjNum = page_obj_num;
177 return;
178 }
179
180 kid.m_Type = Kid::kElement;
181 kid.m_pDict.Reset(pKidDict);
182 }
183