• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include <map>
8 #include <memory>
9 #include <utility>
10 
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_dictionary.h"
13 #include "core/fpdfapi/parser/cpdf_document.h"
14 #include "core/fpdfapi/parser/cpdf_name.h"
15 #include "core/fpdfapi/parser/cpdf_number.h"
16 #include "core/fpdfapi/parser/cpdf_reference.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfdoc/cpdf_numbertree.h"
19 #include "core/fpdfdoc/fpdf_tagged.h"
20 #include "core/fpdfdoc/tagged_int.h"
21 #include "third_party/base/ptr_util.h"
22 
23 namespace {
24 
25 const int nMaxRecursion = 32;
26 
IsTagged(const CPDF_Document * pDoc)27 bool IsTagged(const CPDF_Document* pDoc) {
28   CPDF_Dictionary* pCatalog = pDoc->GetRoot();
29   CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo");
30   return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
31 }
32 
33 }  // namespace
34 
CPDF_StructKid()35 CPDF_StructKid::CPDF_StructKid()
36     : m_Type(Invalid),
37       m_pDict(nullptr),
38       m_PageObjNum(0),
39       m_RefObjNum(0),
40       m_ContentId(0) {}
41 
42 CPDF_StructKid::CPDF_StructKid(const CPDF_StructKid& that) = default;
43 
~CPDF_StructKid()44 CPDF_StructKid::~CPDF_StructKid() {}
45 
46 // static
LoadPage(const CPDF_Document * pDoc,const CPDF_Dictionary * pPageDict)47 std::unique_ptr<IPDF_StructTree> IPDF_StructTree::LoadPage(
48     const CPDF_Document* pDoc,
49     const CPDF_Dictionary* pPageDict) {
50   if (!IsTagged(pDoc))
51     return nullptr;
52 
53   auto pTree = pdfium::MakeUnique<CPDF_StructTree>(pDoc);
54   pTree->LoadPageTree(pPageDict);
55   return std::move(pTree);
56 }
57 
CPDF_StructTree(const CPDF_Document * pDoc)58 CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
59     : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
60       m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr),
61       m_pPage(nullptr) {}
62 
~CPDF_StructTree()63 CPDF_StructTree::~CPDF_StructTree() {}
64 
CountTopElements() const65 int CPDF_StructTree::CountTopElements() const {
66   return pdfium::CollectionSize<int>(m_Kids);
67 }
68 
GetTopElement(int i) const69 IPDF_StructElement* CPDF_StructTree::GetTopElement(int i) const {
70   return m_Kids[i].Get();
71 }
72 
LoadPageTree(const CPDF_Dictionary * pPageDict)73 void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) {
74   m_pPage = pPageDict;
75   if (!m_pTreeRoot)
76     return;
77 
78   CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K");
79   if (!pKids)
80     return;
81 
82   uint32_t dwKids = 0;
83   if (pKids->IsDictionary())
84     dwKids = 1;
85   else if (CPDF_Array* pArray = pKids->AsArray())
86     dwKids = pArray->GetCount();
87   else
88     return;
89 
90   m_Kids.clear();
91   m_Kids.resize(dwKids);
92   CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree");
93   if (!pParentTree)
94     return;
95 
96   CPDF_NumberTree parent_tree(pParentTree);
97   int parents_id = pPageDict->GetIntegerFor("StructParents", -1);
98   if (parents_id < 0)
99     return;
100 
101   CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id));
102   if (!pParentArray)
103     return;
104 
105   std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>> element_map;
106   for (size_t i = 0; i < pParentArray->GetCount(); i++) {
107     if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i))
108       AddPageNode(pParent, &element_map);
109   }
110 }
111 
AddPageNode(CPDF_Dictionary * pDict,std::map<CPDF_Dictionary *,CFX_RetainPtr<CPDF_StructElement>> * map,int nLevel)112 CFX_RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
113     CPDF_Dictionary* pDict,
114     std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map,
115     int nLevel) {
116   if (nLevel > nMaxRecursion)
117     return nullptr;
118 
119   auto it = map->find(pDict);
120   if (it != map->end())
121     return it->second;
122 
123   auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, nullptr, pDict);
124   (*map)[pDict] = pElement;
125   CPDF_Dictionary* pParent = pDict->GetDictFor("P");
126   if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") {
127     if (!AddTopLevelNode(pDict, pElement))
128       map->erase(pDict);
129     return pElement;
130   }
131 
132   CFX_RetainPtr<CPDF_StructElement> pParentElement =
133       AddPageNode(pParent, map, nLevel + 1);
134   bool bSave = false;
135   for (CPDF_StructKid& kid : *pParentElement->GetKids()) {
136     if (kid.m_Type == CPDF_StructKid::Element && kid.m_pDict == pDict) {
137       kid.m_pElement = pElement;
138       bSave = true;
139     }
140   }
141   if (!bSave)
142     map->erase(pDict);
143   return pElement;
144 }
145 
AddTopLevelNode(CPDF_Dictionary * pDict,const CFX_RetainPtr<CPDF_StructElement> & pElement)146 bool CPDF_StructTree::AddTopLevelNode(
147     CPDF_Dictionary* pDict,
148     const CFX_RetainPtr<CPDF_StructElement>& pElement) {
149   CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K");
150   if (!pObj)
151     return false;
152 
153   if (pObj->IsDictionary()) {
154     if (pObj->GetObjNum() != pDict->GetObjNum())
155       return false;
156     m_Kids[0] = pElement;
157   }
158   if (CPDF_Array* pTopKids = pObj->AsArray()) {
159     bool bSave = false;
160     for (size_t i = 0; i < pTopKids->GetCount(); i++) {
161       CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i));
162       if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
163         m_Kids[i] = pElement;
164         bSave = true;
165       }
166     }
167     if (!bSave)
168       return false;
169   }
170   return true;
171 }
172 
CPDF_StructElement(CPDF_StructTree * pTree,CPDF_StructElement * pParent,CPDF_Dictionary * pDict)173 CPDF_StructElement::CPDF_StructElement(CPDF_StructTree* pTree,
174                                        CPDF_StructElement* pParent,
175                                        CPDF_Dictionary* pDict)
176     : m_pTree(pTree),
177       m_pParent(pParent),
178       m_pDict(pDict),
179       m_Type(pDict->GetStringFor("S")) {
180   if (pTree->m_pRoleMap) {
181     CFX_ByteString mapped = pTree->m_pRoleMap->GetStringFor(m_Type);
182     if (!mapped.IsEmpty())
183       m_Type = mapped;
184   }
185   LoadKids(pDict);
186 }
187 
GetTree() const188 IPDF_StructTree* CPDF_StructElement::GetTree() const {
189   return m_pTree;
190 }
191 
GetType() const192 const CFX_ByteString& CPDF_StructElement::GetType() const {
193   return m_Type;
194 }
195 
GetParent() const196 IPDF_StructElement* CPDF_StructElement::GetParent() const {
197   return m_pParent;
198 }
199 
GetDict() const200 CPDF_Dictionary* CPDF_StructElement::GetDict() const {
201   return m_pDict;
202 }
203 
CountKids() const204 int CPDF_StructElement::CountKids() const {
205   return pdfium::CollectionSize<int>(m_Kids);
206 }
207 
GetKidIfElement(int index) const208 IPDF_StructElement* CPDF_StructElement::GetKidIfElement(int index) const {
209   if (m_Kids[index].m_Type != CPDF_StructKid::Element)
210     return nullptr;
211 
212   return m_Kids[index].m_pElement.Get();
213 }
214 
~CPDF_StructElement()215 CPDF_StructElement::~CPDF_StructElement() {}
216 
LoadKids(CPDF_Dictionary * pDict)217 void CPDF_StructElement::LoadKids(CPDF_Dictionary* pDict) {
218   CPDF_Object* pObj = pDict->GetObjectFor("Pg");
219   uint32_t PageObjNum = 0;
220   if (CPDF_Reference* pRef = ToReference(pObj))
221     PageObjNum = pRef->GetRefObjNum();
222 
223   CPDF_Object* pKids = pDict->GetDirectObjectFor("K");
224   if (!pKids)
225     return;
226 
227   m_Kids.clear();
228   if (CPDF_Array* pArray = pKids->AsArray()) {
229     m_Kids.resize(pArray->GetCount());
230     for (uint32_t i = 0; i < pArray->GetCount(); i++) {
231       CPDF_Object* pKid = pArray->GetDirectObjectAt(i);
232       LoadKid(PageObjNum, pKid, &m_Kids[i]);
233     }
234   } else {
235     m_Kids.resize(1);
236     LoadKid(PageObjNum, pKids, &m_Kids[0]);
237   }
238 }
LoadKid(uint32_t PageObjNum,CPDF_Object * pKidObj,CPDF_StructKid * pKid)239 void CPDF_StructElement::LoadKid(uint32_t PageObjNum,
240                                  CPDF_Object* pKidObj,
241                                  CPDF_StructKid* pKid) {
242   pKid->m_Type = CPDF_StructKid::Invalid;
243   if (!pKidObj)
244     return;
245 
246   if (pKidObj->IsNumber()) {
247     if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
248       return;
249     }
250     pKid->m_Type = CPDF_StructKid::PageContent;
251     pKid->m_ContentId = pKidObj->GetInteger();
252     pKid->m_PageObjNum = PageObjNum;
253     return;
254   }
255 
256   CPDF_Dictionary* pKidDict = pKidObj->AsDictionary();
257   if (!pKidDict)
258     return;
259 
260   if (CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Pg")))
261     PageObjNum = pRef->GetRefObjNum();
262 
263   CFX_ByteString type = pKidDict->GetStringFor("Type");
264   if (type == "MCR") {
265     if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
266       return;
267     }
268     pKid->m_Type = CPDF_StructKid::StreamContent;
269     CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Stm"));
270     pKid->m_RefObjNum = pRef ? pRef->GetRefObjNum() : 0;
271     pKid->m_PageObjNum = PageObjNum;
272     pKid->m_ContentId = pKidDict->GetIntegerFor("MCID");
273   } else if (type == "OBJR") {
274     if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
275       return;
276     }
277     pKid->m_Type = CPDF_StructKid::Object;
278     CPDF_Reference* pObj = ToReference(pKidDict->GetObjectFor("Obj"));
279     pKid->m_RefObjNum = pObj ? pObj->GetRefObjNum() : 0;
280     pKid->m_PageObjNum = PageObjNum;
281   } else {
282     pKid->m_Type = CPDF_StructKid::Element;
283     pKid->m_pDict = pKidDict;
284     if (!m_pTree->m_pPage) {
285       pKid->m_pElement =
286           pdfium::MakeRetain<CPDF_StructElement>(m_pTree, this, pKidDict);
287     } else {
288       pKid->m_pElement = nullptr;
289     }
290   }
291 }
FindAttrDict(CPDF_Object * pAttrs,const CFX_ByteStringC & owner,FX_FLOAT nLevel=0.0F)292 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs,
293                                      const CFX_ByteStringC& owner,
294                                      FX_FLOAT nLevel = 0.0F) {
295   if (nLevel > nMaxRecursion)
296     return nullptr;
297   if (!pAttrs)
298     return nullptr;
299 
300   CPDF_Dictionary* pDict = nullptr;
301   if (pAttrs->IsDictionary()) {
302     pDict = pAttrs->AsDictionary();
303   } else if (CPDF_Stream* pStream = pAttrs->AsStream()) {
304     pDict = pStream->GetDict();
305   } else if (CPDF_Array* pArray = pAttrs->AsArray()) {
306     for (uint32_t i = 0; i < pArray->GetCount(); i++) {
307       CPDF_Object* pElement = pArray->GetDirectObjectAt(i);
308       pDict = FindAttrDict(pElement, owner, nLevel + 1);
309       if (pDict)
310         return pDict;
311     }
312   }
313   if (pDict && pDict->GetStringFor("O") == owner)
314     return pDict;
315   return nullptr;
316 }
GetAttr(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,bool bInheritable,FX_FLOAT fLevel)317 CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner,
318                                          const CFX_ByteStringC& name,
319                                          bool bInheritable,
320                                          FX_FLOAT fLevel) {
321   if (fLevel > nMaxRecursion) {
322     return nullptr;
323   }
324   if (bInheritable) {
325     CPDF_Object* pAttr = GetAttr(owner, name, false);
326     if (pAttr) {
327       return pAttr;
328     }
329     if (!m_pParent) {
330       return nullptr;
331     }
332     return m_pParent->GetAttr(owner, name, true, fLevel + 1);
333   }
334   CPDF_Object* pA = m_pDict->GetDirectObjectFor("A");
335   if (pA) {
336     CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
337     if (pAttrDict) {
338       CPDF_Object* pAttr = pAttrDict->GetDirectObjectFor(CFX_ByteString(name));
339       if (pAttr) {
340         return pAttr;
341       }
342     }
343   }
344   CPDF_Object* pC = m_pDict->GetDirectObjectFor("C");
345   if (!pC)
346     return nullptr;
347 
348   CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDictFor("ClassMap");
349   if (!pClassMap)
350     return nullptr;
351 
352   if (CPDF_Array* pArray = pC->AsArray()) {
353     for (uint32_t i = 0; i < pArray->GetCount(); i++) {
354       CFX_ByteString class_name = pArray->GetStringAt(i);
355       CPDF_Dictionary* pClassDict = pClassMap->GetDictFor(class_name);
356       if (pClassDict && pClassDict->GetStringFor("O") == owner)
357         return pClassDict->GetDirectObjectFor(CFX_ByteString(name));
358     }
359     return nullptr;
360   }
361   CFX_ByteString class_name = pC->GetString();
362   CPDF_Dictionary* pClassDict = pClassMap->GetDictFor(class_name);
363   if (pClassDict && pClassDict->GetStringFor("O") == owner)
364     return pClassDict->GetDirectObjectFor(CFX_ByteString(name));
365   return nullptr;
366 }
GetAttr(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,bool bInheritable,int subindex)367 CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner,
368                                          const CFX_ByteStringC& name,
369                                          bool bInheritable,
370                                          int subindex) {
371   CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
372   CPDF_Array* pArray = ToArray(pAttr);
373   if (!pArray || subindex == -1)
374     return pAttr;
375 
376   if (subindex >= static_cast<int>(pArray->GetCount()))
377     return pAttr;
378   return pArray->GetDirectObjectAt(subindex);
379 }
GetName(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,const CFX_ByteStringC & default_value,bool bInheritable,int subindex)380 CFX_ByteString CPDF_StructElement::GetName(const CFX_ByteStringC& owner,
381                                            const CFX_ByteStringC& name,
382                                            const CFX_ByteStringC& default_value,
383                                            bool bInheritable,
384                                            int subindex) {
385   CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
386   if (ToName(pAttr))
387     return pAttr->GetString();
388   return CFX_ByteString(default_value);
389 }
390 
GetColor(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,FX_ARGB default_value,bool bInheritable,int subindex)391 FX_ARGB CPDF_StructElement::GetColor(const CFX_ByteStringC& owner,
392                                      const CFX_ByteStringC& name,
393                                      FX_ARGB default_value,
394                                      bool bInheritable,
395                                      int subindex) {
396   CPDF_Array* pArray = ToArray(GetAttr(owner, name, bInheritable, subindex));
397   if (!pArray)
398     return default_value;
399   return 0xff000000 | ((int)(pArray->GetNumberAt(0) * 255) << 16) |
400          ((int)(pArray->GetNumberAt(1) * 255) << 8) |
401          (int)(pArray->GetNumberAt(2) * 255);
402 }
GetNumber(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,FX_FLOAT default_value,bool bInheritable,int subindex)403 FX_FLOAT CPDF_StructElement::GetNumber(const CFX_ByteStringC& owner,
404                                        const CFX_ByteStringC& name,
405                                        FX_FLOAT default_value,
406                                        bool bInheritable,
407                                        int subindex) {
408   CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
409   return ToNumber(pAttr) ? pAttr->GetNumber() : default_value;
410 }
GetInteger(const CFX_ByteStringC & owner,const CFX_ByteStringC & name,int default_value,bool bInheritable,int subindex)411 int CPDF_StructElement::GetInteger(const CFX_ByteStringC& owner,
412                                    const CFX_ByteStringC& name,
413                                    int default_value,
414                                    bool bInheritable,
415                                    int subindex) {
416   CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
417   return ToNumber(pAttr) ? pAttr->GetInteger() : default_value;
418 }
419