1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "../../include/fpdfapi/fpdf_parser.h"
8 #include "../../include/fpdfapi/fpdf_page.h"
9 #include "../../include/fpdfdoc/fpdf_tagged.h"
10 #include "tagged_int.h"
11 const int nMaxRecursion = 32;
IsTagged(const CPDF_Document * pDoc)12 static FX_BOOL IsTagged(const CPDF_Document* pDoc)
13 {
14 CPDF_Dictionary* pCatalog = pDoc->GetRoot();
15 CPDF_Dictionary* pMarkInfo = pCatalog->GetDict(FX_BSTRC("MarkInfo"));
16 return pMarkInfo != NULL && pMarkInfo->GetInteger(FX_BSTRC("Marked"));
17 }
LoadPage(const CPDF_Document * pDoc,const CPDF_Dictionary * pPageDict)18 CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, const CPDF_Dictionary* pPageDict)
19 {
20 if (!IsTagged(pDoc)) {
21 return NULL;
22 }
23 CPDF_StructTreeImpl* pTree = FX_NEW CPDF_StructTreeImpl(pDoc);
24 if (pTree == NULL) {
25 return NULL;
26 }
27 pTree->LoadPageTree(pPageDict);
28 return pTree;
29 }
LoadDoc(const CPDF_Document * pDoc)30 CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc)
31 {
32 if (!IsTagged(pDoc)) {
33 return NULL;
34 }
35 CPDF_StructTreeImpl* pTree = FX_NEW CPDF_StructTreeImpl(pDoc);
36 if (pTree == NULL) {
37 return NULL;
38 }
39 pTree->LoadDocTree();
40 return pTree;
41 }
CPDF_StructTreeImpl(const CPDF_Document * pDoc)42 CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc)
43 {
44 CPDF_Dictionary* pCatalog = pDoc->GetRoot();
45 m_pTreeRoot = pCatalog->GetDict(FX_BSTRC("StructTreeRoot"));
46 if (m_pTreeRoot == NULL) {
47 return;
48 }
49 m_pRoleMap = m_pTreeRoot->GetDict(FX_BSTRC("RoleMap"));
50 }
~CPDF_StructTreeImpl()51 CPDF_StructTreeImpl::~CPDF_StructTreeImpl()
52 {
53 for (int i = 0; i < m_Kids.GetSize(); i ++)
54 if (m_Kids[i]) {
55 m_Kids[i]->Release();
56 }
57 }
LoadDocTree()58 void CPDF_StructTreeImpl::LoadDocTree()
59 {
60 m_pPage = NULL;
61 if (m_pTreeRoot == NULL) {
62 return;
63 }
64 CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
65 if (pKids == NULL) {
66 return;
67 }
68 if (pKids->GetType() == PDFOBJ_DICTIONARY) {
69 CPDF_StructElementImpl* pStructElementImpl = FX_NEW CPDF_StructElementImpl(this, NULL, (CPDF_Dictionary*)pKids);
70 if (pStructElementImpl == NULL) {
71 return;
72 }
73 m_Kids.Add(pStructElementImpl);
74 return;
75 }
76 if (pKids->GetType() != PDFOBJ_ARRAY) {
77 return;
78 }
79 CPDF_Array* pArray = (CPDF_Array*)pKids;
80 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
81 CPDF_Dictionary* pKid = pArray->GetDict(i);
82 CPDF_StructElementImpl* pStructElementImpl = FX_NEW CPDF_StructElementImpl(this, NULL, pKid);
83 if (pStructElementImpl == NULL) {
84 return;
85 }
86 m_Kids.Add(pStructElementImpl);
87 }
88 }
LoadPageTree(const CPDF_Dictionary * pPageDict)89 void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict)
90 {
91 m_pPage = pPageDict;
92 if (m_pTreeRoot == NULL) {
93 return;
94 }
95 CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
96 if (pKids == NULL) {
97 return;
98 }
99 FX_DWORD dwKids = 0;
100 if (pKids->GetType() == PDFOBJ_DICTIONARY) {
101 dwKids = 1;
102 } else if (pKids->GetType() == PDFOBJ_ARRAY) {
103 dwKids = ((CPDF_Array*)pKids)->GetCount();
104 } else {
105 return;
106 }
107 FX_DWORD i;
108 m_Kids.SetSize(dwKids);
109 for (i = 0; i < dwKids; i ++) {
110 m_Kids[i] = NULL;
111 }
112 CFX_MapPtrToPtr element_map;
113 CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict(FX_BSTRC("ParentTree"));
114 if (pParentTree == NULL) {
115 return;
116 }
117 CPDF_NumberTree parent_tree(pParentTree);
118 int parents_id = pPageDict->GetInteger(FX_BSTRC("StructParents"), -1);
119 if (parents_id >= 0) {
120 CPDF_Object* pParents = parent_tree.LookupValue(parents_id);
121 if (pParents == NULL || pParents->GetType() != PDFOBJ_ARRAY) {
122 return;
123 }
124 CPDF_Array* pParentArray = (CPDF_Array*)pParents;
125 for (i = 0; i < pParentArray->GetCount(); i ++) {
126 CPDF_Dictionary* pParent = pParentArray->GetDict(i);
127 if (pParent == NULL) {
128 continue;
129 }
130 AddPageNode(pParent, element_map);
131 }
132 }
133 }
AddPageNode(CPDF_Dictionary * pDict,CFX_MapPtrToPtr & map,int nLevel)134 CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, CFX_MapPtrToPtr& map, int nLevel)
135 {
136 if (nLevel > nMaxRecursion) {
137 return NULL;
138 }
139 CPDF_StructElementImpl* pElement = NULL;
140 if (map.Lookup(pDict, (FX_LPVOID&)pElement)) {
141 return pElement;
142 }
143 pElement = FX_NEW CPDF_StructElementImpl(this, NULL, pDict);
144 if (pElement == NULL) {
145 return NULL;
146 }
147 map.SetAt(pDict, pElement);
148 CPDF_Dictionary* pParent = pDict->GetDict(FX_BSTRC("P"));
149 if (pParent == NULL || pParent->GetString(FX_BSTRC("Type")) == FX_BSTRC("StructTreeRoot")) {
150 if (!AddTopLevelNode(pDict, pElement)) {
151 pElement->Release();
152 map.RemoveKey(pDict);
153 }
154 } else {
155 CPDF_StructElementImpl* pParentElement = AddPageNode(pParent, map, nLevel + 1);
156 FX_BOOL bSave = FALSE;
157 for (int i = 0; i < pParentElement->m_Kids.GetSize(); i ++) {
158 if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) {
159 continue;
160 }
161 if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) {
162 continue;
163 }
164 pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain();
165 bSave = TRUE;
166 }
167 if (!bSave) {
168 pElement->Release();
169 map.RemoveKey(pDict);
170 }
171 }
172 return pElement;
173 }
AddTopLevelNode(CPDF_Dictionary * pDict,CPDF_StructElementImpl * pElement)174 FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, CPDF_StructElementImpl* pElement)
175 {
176 CPDF_Object *pObj = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
177 if (!pObj) {
178 return FALSE;
179 }
180 if (pObj->GetType() == PDFOBJ_DICTIONARY) {
181 if (pObj->GetObjNum() == pDict->GetObjNum()) {
182 if (m_Kids[0]) {
183 m_Kids[0]->Release();
184 }
185 m_Kids[0] = pElement->Retain();
186 } else {
187 return FALSE;
188 }
189 }
190 if (pObj->GetType() == PDFOBJ_ARRAY) {
191 CPDF_Array* pTopKids = (CPDF_Array*)pObj;
192 FX_DWORD i;
193 FX_BOOL bSave = FALSE;
194 for (i = 0; i < pTopKids->GetCount(); i ++) {
195 CPDF_Reference* pKidRef = (CPDF_Reference*)pTopKids->GetElement(i);
196 if (pKidRef->GetType() != PDFOBJ_REFERENCE || pKidRef->GetRefObjNum() != pDict->GetObjNum()) {
197 continue;
198 }
199 if (m_Kids[i]) {
200 m_Kids[i]->Release();
201 }
202 m_Kids[i] = pElement->Retain();
203 bSave = TRUE;
204 }
205 if (!bSave) {
206 return FALSE;
207 }
208 }
209 return TRUE;
210 }
CPDF_StructElementImpl(CPDF_StructTreeImpl * pTree,CPDF_StructElementImpl * pParent,CPDF_Dictionary * pDict)211 CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, CPDF_StructElementImpl* pParent, CPDF_Dictionary* pDict)
212 : m_RefCount(0)
213 {
214 m_pTree = pTree;
215 m_pDict = pDict;
216 m_Type = pDict->GetString(FX_BSTRC("S"));
217 CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type);
218 if (!mapped.IsEmpty()) {
219 m_Type = mapped;
220 }
221 m_pParent = pParent;
222 LoadKids(pDict);
223 }
~CPDF_StructElementImpl()224 CPDF_StructElementImpl::~CPDF_StructElementImpl()
225 {
226 for (int i = 0; i < m_Kids.GetSize(); i ++) {
227 if (m_Kids[i].m_Type == CPDF_StructKid::Element && m_Kids[i].m_Element.m_pElement) {
228 ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release();
229 }
230 }
231 }
Retain()232 CPDF_StructElementImpl* CPDF_StructElementImpl::Retain()
233 {
234 m_RefCount++;
235 return this;
236 }
Release()237 void CPDF_StructElementImpl::Release()
238 {
239 if(--m_RefCount < 1) {
240 delete this;
241 }
242 }
LoadKids(CPDF_Dictionary * pDict)243 void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict)
244 {
245 CPDF_Object* pObj = pDict->GetElement(FX_BSTRC("Pg"));
246 FX_DWORD PageObjNum = 0;
247 if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
248 PageObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
249 }
250 CPDF_Object* pKids = pDict->GetElementValue(FX_BSTRC("K"));
251 if (pKids == NULL) {
252 return;
253 }
254 if (pKids->GetType() == PDFOBJ_ARRAY) {
255 CPDF_Array* pArray = (CPDF_Array*)pKids;
256 m_Kids.SetSize(pArray->GetCount());
257 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
258 CPDF_Object* pKid = pArray->GetElementValue(i);
259 LoadKid(PageObjNum, pKid, &m_Kids[i]);
260 }
261 } else {
262 m_Kids.SetSize(1);
263 LoadKid(PageObjNum, pKids, &m_Kids[0]);
264 }
265 }
LoadKid(FX_DWORD PageObjNum,CPDF_Object * pKidObj,CPDF_StructKid * pKid)266 void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid)
267 {
268 pKid->m_Type = CPDF_StructKid::Invalid;
269 if (pKidObj == NULL) {
270 return;
271 }
272 if (pKidObj->GetType() == PDFOBJ_NUMBER) {
273 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
274 return;
275 }
276 pKid->m_Type = CPDF_StructKid::PageContent;
277 pKid->m_PageContent.m_ContentId = pKidObj->GetInteger();
278 pKid->m_PageContent.m_PageObjNum = PageObjNum;
279 return;
280 }
281 if (pKidObj->GetType() != PDFOBJ_DICTIONARY) {
282 return;
283 }
284 CPDF_Dictionary* pKidDict = (CPDF_Dictionary*)pKidObj;
285 CPDF_Object* pPageObj = pKidDict->GetElement(FX_BSTRC("Pg"));
286 if (pPageObj && pPageObj->GetType() == PDFOBJ_REFERENCE) {
287 PageObjNum = ((CPDF_Reference*)pPageObj)->GetRefObjNum();
288 }
289 CFX_ByteString type = pKidDict->GetString(FX_BSTRC("Type"));
290 if (type == FX_BSTRC("MCR")) {
291 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
292 return;
293 }
294 pKid->m_Type = CPDF_StructKid::StreamContent;
295 CPDF_Object* pStreamObj = pKidDict->GetElement(FX_BSTRC("Stm"));
296 if (pStreamObj && pStreamObj->GetType() == PDFOBJ_REFERENCE) {
297 pKid->m_StreamContent.m_RefObjNum = ((CPDF_Reference*)pStreamObj)->GetRefObjNum();
298 } else {
299 pKid->m_StreamContent.m_RefObjNum = 0;
300 }
301 pKid->m_StreamContent.m_PageObjNum = PageObjNum;
302 pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger(FX_BSTRC("MCID"));
303 } else if (type == FX_BSTRC("OBJR")) {
304 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
305 return;
306 }
307 pKid->m_Type = CPDF_StructKid::Object;
308 CPDF_Object* pObj = pKidDict->GetElement(FX_BSTRC("Obj"));
309 if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
310 pKid->m_Object.m_RefObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
311 } else {
312 pKid->m_Object.m_RefObjNum = 0;
313 }
314 pKid->m_Object.m_PageObjNum = PageObjNum;
315 } else {
316 pKid->m_Type = CPDF_StructKid::Element;
317 pKid->m_Element.m_pDict = pKidDict;
318 if (m_pTree->m_pPage == NULL) {
319 pKid->m_Element.m_pElement = FX_NEW CPDF_StructElementImpl(m_pTree, this, pKidDict);
320 } else {
321 pKid->m_Element.m_pElement = NULL;
322 }
323 }
324 }
FindAttrDict(CPDF_Object * pAttrs,FX_BSTR owner,FX_FLOAT nLevel=0.0F)325 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, FX_BSTR owner, FX_FLOAT nLevel = 0.0F)
326 {
327 if (nLevel > nMaxRecursion) {
328 return NULL;
329 }
330 if (pAttrs == NULL) {
331 return NULL;
332 }
333 CPDF_Dictionary* pDict = NULL;
334 if (pAttrs->GetType() == PDFOBJ_DICTIONARY) {
335 pDict = (CPDF_Dictionary*)pAttrs;
336 } else if (pAttrs->GetType() == PDFOBJ_STREAM) {
337 pDict = ((CPDF_Stream*)pAttrs)->GetDict();
338 } else if (pAttrs->GetType() == PDFOBJ_ARRAY) {
339 CPDF_Array* pArray = (CPDF_Array*)pAttrs;
340 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
341 CPDF_Object* pElement = pArray->GetElementValue(i);
342 pDict = FindAttrDict(pElement, owner, nLevel + 1);
343 if (pDict) {
344 return pDict;
345 }
346 }
347 }
348 if (pDict && pDict->GetString(FX_BSTRC("O")) == owner) {
349 return pDict;
350 }
351 return NULL;
352 }
GetAttr(FX_BSTR owner,FX_BSTR name,FX_BOOL bInheritable,FX_FLOAT fLevel)353 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, FX_FLOAT fLevel)
354 {
355 if (fLevel > nMaxRecursion) {
356 return NULL;
357 }
358 if (bInheritable) {
359 CPDF_Object* pAttr = GetAttr(owner, name, FALSE);
360 if (pAttr) {
361 return pAttr;
362 }
363 if (m_pParent == NULL) {
364 return NULL;
365 }
366 return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1);
367 }
368 CPDF_Object* pA = m_pDict->GetElementValue(FX_BSTRC("A"));
369 if (pA) {
370 CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
371 if (pAttrDict) {
372 CPDF_Object* pAttr = pAttrDict->GetElementValue(name);
373 if (pAttr) {
374 return pAttr;
375 }
376 }
377 }
378 CPDF_Object* pC = m_pDict->GetElementValue(FX_BSTRC("C"));
379 if (pC == NULL) {
380 return NULL;
381 }
382 CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict(FX_BSTRC("ClassMap"));
383 if (pClassMap == NULL) {
384 return NULL;
385 }
386 if (pC->GetType() == PDFOBJ_ARRAY) {
387 CPDF_Array* pArray = (CPDF_Array*)pC;
388 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
389 CFX_ByteString class_name = pArray->GetString(i);
390 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
391 if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
392 return pClassDict->GetElementValue(name);
393 }
394 }
395 return NULL;
396 }
397 CFX_ByteString class_name = pC->GetString();
398 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
399 if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
400 return pClassDict->GetElementValue(name);
401 }
402 return NULL;
403 }
GetAttr(FX_BSTR owner,FX_BSTR name,FX_BOOL bInheritable,int subindex)404 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, int subindex)
405 {
406 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
407 if (pAttr == NULL || subindex == -1 || pAttr->GetType() != PDFOBJ_ARRAY) {
408 return pAttr;
409 }
410 CPDF_Array* pArray = (CPDF_Array*)pAttr;
411 if (subindex >= (int)pArray->GetCount()) {
412 return pAttr;
413 }
414 return pArray->GetElementValue(subindex);
415 }
GetName(FX_BSTR owner,FX_BSTR name,FX_BSTR default_value,FX_BOOL bInheritable,int subindex)416 CFX_ByteString CPDF_StructElementImpl::GetName(FX_BSTR owner, FX_BSTR name, FX_BSTR default_value, FX_BOOL bInheritable, int subindex)
417 {
418 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
419 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NAME) {
420 return default_value;
421 }
422 return pAttr->GetString();
423 }
GetColor(FX_BSTR owner,FX_BSTR name,FX_ARGB default_value,FX_BOOL bInheritable,int subindex)424 FX_ARGB CPDF_StructElementImpl::GetColor(FX_BSTR owner, FX_BSTR name, FX_ARGB default_value, FX_BOOL bInheritable, int subindex)
425 {
426 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
427 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_ARRAY) {
428 return default_value;
429 }
430 CPDF_Array* pArray = (CPDF_Array*)pAttr;
431 return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | ((int)(pArray->GetNumber(1) * 255) << 8) | (int)(pArray->GetNumber(2) * 255);
432 }
GetNumber(FX_BSTR owner,FX_BSTR name,FX_FLOAT default_value,FX_BOOL bInheritable,int subindex)433 FX_FLOAT CPDF_StructElementImpl::GetNumber(FX_BSTR owner, FX_BSTR name, FX_FLOAT default_value, FX_BOOL bInheritable, int subindex)
434 {
435 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
436 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
437 return default_value;
438 }
439 return pAttr->GetNumber();
440 }
GetInteger(FX_BSTR owner,FX_BSTR name,int default_value,FX_BOOL bInheritable,int subindex)441 int CPDF_StructElementImpl::GetInteger(FX_BSTR owner, FX_BSTR name, int default_value, FX_BOOL bInheritable, int subindex)
442 {
443 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
444 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
445 return default_value;
446 }
447 return pAttr->GetInteger();
448 }
449