// Copyright 2017 The PDFium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "core/fpdfdoc/cpdf_structtree.h" #include #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfdoc/cpdf_numbertree.h" #include "core/fpdfdoc/cpdf_structelement.h" #include "core/fxcrt/stl_util.h" namespace { bool IsTagged(const CPDF_Document* pDoc) { RetainPtr pMarkInfo = pDoc->GetRoot()->GetDictFor("MarkInfo"); return pMarkInfo && pMarkInfo->GetIntegerFor("Marked"); } } // namespace // static std::unique_ptr CPDF_StructTree::LoadPage( const CPDF_Document* pDoc, RetainPtr pPageDict) { if (!IsTagged(pDoc)) return nullptr; auto pTree = std::make_unique(pDoc); pTree->LoadPageTree(std::move(pPageDict)); return pTree; } CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc) : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")), m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr) {} CPDF_StructTree::~CPDF_StructTree() = default; ByteString CPDF_StructTree::GetRoleMapNameFor(const ByteString& type) const { if (m_pRoleMap) { ByteString mapped = m_pRoleMap->GetNameFor(type); if (!mapped.IsEmpty()) return mapped; } return type; } void CPDF_StructTree::LoadPageTree(RetainPtr pPageDict) { m_pPage = std::move(pPageDict); if (!m_pTreeRoot) return; RetainPtr pKids = m_pTreeRoot->GetDirectObjectFor("K"); if (!pKids) return; uint32_t dwKids = 0; if (pKids->IsDictionary()) dwKids = 1; else if (const CPDF_Array* pArray = pKids->AsArray()) dwKids = fxcrt::CollectionSize(*pArray); else return; m_Kids.clear(); m_Kids.resize(dwKids); RetainPtr pParentTree = m_pTreeRoot->GetDictFor("ParentTree"); if (!pParentTree) return; CPDF_NumberTree parent_tree(std::move(pParentTree)); int parents_id = m_pPage->GetIntegerFor("StructParents", -1); if (parents_id < 0) return; RetainPtr pParentArray = ToArray(parent_tree.LookupValue(parents_id)); if (!pParentArray) return; StructElementMap element_map; for (size_t i = 0; i < pParentArray->size(); i++) { RetainPtr pParent = pParentArray->GetDictAt(i); if (pParent) AddPageNode(std::move(pParent), &element_map, 0); } } RetainPtr CPDF_StructTree::AddPageNode( RetainPtr pDict, StructElementMap* map, int nLevel) { static constexpr int kStructTreeMaxRecursion = 32; if (nLevel > kStructTreeMaxRecursion) return nullptr; auto it = map->find(pDict); if (it != map->end()) return it->second; RetainPtr key(pDict); auto pElement = pdfium::MakeRetain(this, pDict); (*map)[key] = pElement; RetainPtr pParent = pDict->GetDictFor("P"); if (!pParent || pParent->GetNameFor("Type") == "StructTreeRoot") { if (!AddTopLevelNode(pDict, pElement)) map->erase(key); return pElement; } RetainPtr pParentElement = AddPageNode(std::move(pParent), map, nLevel + 1); if (!pParentElement) return pElement; if (!pParentElement->UpdateKidIfElement(pDict, pElement.Get())) map->erase(key); pElement->SetParent(pParentElement.Get()); return pElement; } bool CPDF_StructTree::AddTopLevelNode( const CPDF_Dictionary* pDict, const RetainPtr& pElement) { RetainPtr pObj = m_pTreeRoot->GetDirectObjectFor("K"); if (!pObj) return false; if (pObj->IsDictionary()) { if (pObj->GetObjNum() != pDict->GetObjNum()) return false; m_Kids[0] = pElement; } const CPDF_Array* pTopKids = pObj->AsArray(); if (!pTopKids) return true; bool bSave = false; for (size_t i = 0; i < pTopKids->size(); i++) { RetainPtr pKidRef = ToReference(pTopKids->GetObjectAt(i)); if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) { m_Kids[i] = pElement; bSave = true; } } return bSave; }