// Copyright 2016 The PDFium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "core/fpdfdoc/cpdf_nametree.h" #include #include #include #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_string.h" #include "core/fpdfapi/parser/fpdf_parser_decode.h" #include "core/fxcrt/check.h" #include "core/fxcrt/ptr_util.h" #include "core/fxcrt/stl_util.h" namespace { constexpr int kNameTreeMaxRecursion = 32; std::pair GetNodeLimitsAndSanitize( CPDF_Array* pLimits) { DCHECK(pLimits); WideString csLeft = pLimits->GetUnicodeTextAt(0); WideString csRight = pLimits->GetUnicodeTextAt(1); // If the lower limit is greater than the upper limit, swap them. if (csLeft.Compare(csRight) > 0) { pLimits->SetNewAt(0, csRight.AsStringView()); pLimits->SetNewAt(1, csLeft.AsStringView()); csLeft = pLimits->GetUnicodeTextAt(0); csRight = pLimits->GetUnicodeTextAt(1); } while (pLimits->size() > 2) pLimits->RemoveAt(pLimits->size() - 1); return {csLeft, csRight}; } // Get the limit arrays that leaf array |pFind| is under in the tree with root // |pNode|. |pLimits| will hold all the limit arrays from the leaf up to before // the root. Return true if successful. bool GetNodeAncestorsLimitsInternal(const RetainPtr& pNode, const CPDF_Array* pFind, int nLevel, std::vector* pLimits) { if (nLevel > kNameTreeMaxRecursion) return false; if (pNode->GetArrayFor("Names") == pFind) { pLimits->push_back(pNode->GetMutableArrayFor("Limits").Get()); return true; } RetainPtr pKids = pNode->GetMutableArrayFor("Kids"); if (!pKids) return false; for (size_t i = 0; i < pKids->size(); ++i) { RetainPtr pKid = pKids->GetMutableDictAt(i); if (!pKid) continue; if (GetNodeAncestorsLimitsInternal(pKid, pFind, nLevel + 1, pLimits)) { pLimits->push_back(pNode->GetMutableArrayFor("Limits").Get()); return true; } } return false; } // Wrapper for GetNodeAncestorsLimitsInternal() so callers do not need to know // about the details. std::vector GetNodeAncestorsLimits( const RetainPtr& pNode, const CPDF_Array* pFind) { std::vector results; GetNodeAncestorsLimitsInternal(pNode, pFind, 0, &results); return results; } // Upon the deletion of |csName| from leaf array |pFind|, update the ancestors // of |pFind|. Specifically, the limits of |pFind|'s ancestors will be updated // if needed, and any ancestors that are now empty will be removed. bool UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary* pNode, const CPDF_Array* pFind, const WideString& csName, int nLevel) { if (nLevel > kNameTreeMaxRecursion) return false; RetainPtr pLimits = pNode->GetMutableArrayFor("Limits"); WideString csLeft; WideString csRight; if (pLimits) std::tie(csLeft, csRight) = GetNodeLimitsAndSanitize(pLimits.Get()); RetainPtr pNames = pNode->GetArrayFor("Names"); if (pNames) { if (pNames != pFind) return false; if (pNames->IsEmpty() || !pLimits) return true; if (csLeft != csName && csRight != csName) return true; // Since |csName| defines |pNode|'s limits, we need to loop through the // names to find the new lower and upper limits. WideString csNewLeft = csRight; WideString csNewRight = csLeft; for (size_t i = 0; i < pNames->size() / 2; ++i) { WideString wsName = pNames->GetUnicodeTextAt(i * 2); if (wsName.Compare(csNewLeft) < 0) csNewLeft = wsName; if (wsName.Compare(csNewRight) > 0) csNewRight = wsName; } pLimits->SetNewAt(0, csNewLeft.AsStringView()); pLimits->SetNewAt(1, csNewRight.AsStringView()); return true; } RetainPtr pKids = pNode->GetMutableArrayFor("Kids"); if (!pKids) return false; // Loop through the kids to find the leaf array |pFind|. for (size_t i = 0; i < pKids->size(); ++i) { RetainPtr pKid = pKids->GetMutableDictAt(i); if (!pKid) continue; if (!UpdateNodesAndLimitsUponDeletion(pKid.Get(), pFind, csName, nLevel + 1)) { continue; } // Remove this child node if it's empty. if ((pKid->KeyExist("Names") && pKid->GetArrayFor("Names")->IsEmpty()) || (pKid->KeyExist("Kids") && pKid->GetArrayFor("Kids")->IsEmpty())) { pKids->RemoveAt(i); } if (pKids->IsEmpty() || !pLimits) return true; if (csLeft != csName && csRight != csName) return true; // Since |csName| defines |pNode|'s limits, we need to loop through the // kids to find the new lower and upper limits. WideString csNewLeft = csRight; WideString csNewRight = csLeft; for (size_t j = 0; j < pKids->size(); ++j) { RetainPtr pKidLimits = pKids->GetDictAt(j)->GetArrayFor("Limits"); DCHECK(pKidLimits); if (pKidLimits->GetUnicodeTextAt(0).Compare(csNewLeft) < 0) csNewLeft = pKidLimits->GetUnicodeTextAt(0); if (pKidLimits->GetUnicodeTextAt(1).Compare(csNewRight) > 0) csNewRight = pKidLimits->GetUnicodeTextAt(1); } pLimits->SetNewAt(0, csNewLeft.AsStringView()); pLimits->SetNewAt(1, csNewRight.AsStringView()); return true; } return false; } bool IsTraversedObject(const CPDF_Object* obj, std::set* seen_obj_nums) { uint32_t obj_num = obj->GetObjNum(); if (!obj_num) return false; bool inserted = seen_obj_nums->insert(obj_num).second; return !inserted; } bool IsArrayWithTraversedObject(const CPDF_Array* array, std::set* seen_obj_nums) { if (IsTraversedObject(array, seen_obj_nums)) return true; CPDF_ArrayLocker locker(array); for (const auto& item : locker) { if (IsTraversedObject(item.Get(), seen_obj_nums)) return true; } return false; } // Search for |csName| in the tree with root |pNode|. If successful, return the // value that |csName| points to; |nIndex| will be the index of |csName|, // |ppFind| will be the leaf array that |csName| is found in, and |pFindIndex| // will be the index of |csName| in |ppFind|. If |csName| is not found, |ppFind| // will be the leaf array that |csName| should be added to, and |pFindIndex| // will be the index that it should be added at. RetainPtr SearchNameNodeByNameInternal( const RetainPtr& pNode, const WideString& csName, int nLevel, size_t* nIndex, RetainPtr* ppFind, int* pFindIndex, std::set* seen_obj_nums) { if (nLevel > kNameTreeMaxRecursion) return nullptr; RetainPtr pLimits = pNode->GetMutableArrayFor("Limits"); RetainPtr pNames = pNode->GetMutableArrayFor("Names"); if (pNames && IsArrayWithTraversedObject(pNames.Get(), seen_obj_nums)) pNames.Reset(); if (pLimits && IsArrayWithTraversedObject(pLimits.Get(), seen_obj_nums)) pLimits.Reset(); if (pLimits) { auto [csLeft, csRight] = GetNodeLimitsAndSanitize(pLimits.Get()); // Skip this node if the name to look for is smaller than its lower limit. if (csName.Compare(csLeft) < 0) return nullptr; // Skip this node if the name to look for is greater than its higher limit, // and the node itself is a leaf node. if (csName.Compare(csRight) > 0 && pNames) { if (ppFind) *ppFind = pNames; if (pFindIndex) *pFindIndex = fxcrt::CollectionSize(*pNames) / 2 - 1; return nullptr; } } // If the node is a leaf node, look for the name in its names array. if (pNames) { size_t dwCount = pNames->size() / 2; for (size_t i = 0; i < dwCount; i++) { WideString csValue = pNames->GetUnicodeTextAt(i * 2); int32_t iCompare = csValue.Compare(csName); if (iCompare > 0) break; if (ppFind) *ppFind = pNames; if (pFindIndex) *pFindIndex = pdfium::checked_cast(i); if (iCompare < 0) continue; *nIndex += i; return pNames->GetDirectObjectAt(i * 2 + 1); } *nIndex += dwCount; return nullptr; } // Search through the node's children. RetainPtr pKids = pNode->GetMutableArrayFor("Kids"); if (!pKids || IsTraversedObject(pKids.Get(), seen_obj_nums)) return nullptr; for (size_t i = 0; i < pKids->size(); i++) { RetainPtr pKid = pKids->GetMutableDictAt(i); if (!pKid || IsTraversedObject(pKid.Get(), seen_obj_nums)) continue; RetainPtr pFound = SearchNameNodeByNameInternal( pKid, csName, nLevel + 1, nIndex, ppFind, pFindIndex, seen_obj_nums); if (pFound) return pFound; } return nullptr; } // Wrapper for SearchNameNodeByNameInternal() so callers do not need to know // about the details. RetainPtr SearchNameNodeByName( const RetainPtr& pNode, const WideString& csName, RetainPtr* ppFind, int* pFindIndex) { size_t nIndex = 0; std::set seen_obj_nums; return SearchNameNodeByNameInternal(pNode, csName, 0, &nIndex, ppFind, pFindIndex, &seen_obj_nums); } struct IndexSearchResult { // For the n-th object in a tree, the key and value. WideString key; RetainPtr value; // The leaf node that holds `key` and `value`. RetainPtr container; // The index for `key` in `container`. Must be even. size_t index; }; // Find the `nTargetPairIndex` node in the tree with root `pNode`. `nLevel` // tracks the recursion level and `nCurPairIndex` tracks the progress towards // `nTargetPairIndex`. std::optional SearchNameNodeByIndexInternal( CPDF_Dictionary* pNode, size_t nTargetPairIndex, int nLevel, size_t* nCurPairIndex) { if (nLevel > kNameTreeMaxRecursion) return std::nullopt; RetainPtr pNames = pNode->GetMutableArrayFor("Names"); if (pNames) { size_t nCount = pNames->size() / 2; if (nTargetPairIndex >= *nCurPairIndex + nCount) { *nCurPairIndex += nCount; return std::nullopt; } size_t index = 2 * (nTargetPairIndex - *nCurPairIndex); RetainPtr value = pNames->GetMutableDirectObjectAt(index + 1); if (!value) return std::nullopt; IndexSearchResult result; result.key = pNames->GetUnicodeTextAt(index); result.value = std::move(value); result.container = std::move(pNames); result.index = index; return result; } RetainPtr pKids = pNode->GetMutableArrayFor("Kids"); if (!pKids) return std::nullopt; for (size_t i = 0; i < pKids->size(); i++) { RetainPtr pKid = pKids->GetMutableDictAt(i); if (!pKid) continue; std::optional result = SearchNameNodeByIndexInternal( pKid.Get(), nTargetPairIndex, nLevel + 1, nCurPairIndex); if (result.has_value()) return result; } return std::nullopt; } // Wrapper for SearchNameNodeByIndexInternal() so callers do not need to know // about the details. std::optional SearchNameNodeByIndex( CPDF_Dictionary* pNode, size_t nTargetPairIndex) { size_t nCurPairIndex = 0; return SearchNameNodeByIndexInternal(pNode, nTargetPairIndex, 0, &nCurPairIndex); } // Get the total number of key-value pairs in the tree with root |pNode|. size_t CountNamesInternal(const CPDF_Dictionary* pNode, int nLevel, std::set& seen) { if (nLevel > kNameTreeMaxRecursion) return 0; const bool inserted = seen.insert(pNode).second; if (!inserted) return 0; RetainPtr pNames = pNode->GetArrayFor("Names"); if (pNames) return pNames->size() / 2; RetainPtr pKids = pNode->GetArrayFor("Kids"); if (!pKids) return 0; size_t nCount = 0; for (size_t i = 0; i < pKids->size(); i++) { RetainPtr pKid = pKids->GetDictAt(i); if (!pKid) continue; nCount += CountNamesInternal(pKid.Get(), nLevel + 1, seen); } return nCount; } RetainPtr GetNamedDestFromObject( RetainPtr obj) { RetainPtr array = ToArray(obj); if (array) return array; RetainPtr dict = ToDictionary(obj); if (dict) return dict->GetArrayFor("D"); return nullptr; } RetainPtr LookupOldStyleNamedDest(CPDF_Document* pDoc, const ByteString& name) { RetainPtr pDests = pDoc->GetRoot()->GetDictFor("Dests"); if (!pDests) return nullptr; return GetNamedDestFromObject(pDests->GetDirectObjectFor(name)); } } // namespace CPDF_NameTree::CPDF_NameTree(RetainPtr pRoot) : m_pRoot(std::move(pRoot)) { DCHECK(m_pRoot); } CPDF_NameTree::~CPDF_NameTree() = default; // static std::unique_ptr CPDF_NameTree::Create( CPDF_Document* pDoc, const ByteString& category) { RetainPtr pRoot = pDoc->GetMutableRoot(); if (!pRoot) return nullptr; RetainPtr pNames = pRoot->GetMutableDictFor("Names"); if (!pNames) return nullptr; RetainPtr pCategory = pNames->GetMutableDictFor(category); if (!pCategory) return nullptr; return pdfium::WrapUnique( new CPDF_NameTree(std::move(pCategory))); // Private ctor. } // static std::unique_ptr CPDF_NameTree::CreateWithRootNameArray( CPDF_Document* pDoc, const ByteString& category) { RetainPtr pRoot = pDoc->GetMutableRoot(); if (!pRoot) return nullptr; // Retrieve the document's Names dictionary; create it if missing. RetainPtr pNames = pRoot->GetMutableDictFor("Names"); if (!pNames) { pNames = pDoc->NewIndirect(); pRoot->SetNewFor("Names", pDoc, pNames->GetObjNum()); } // Create the |category| dictionary if missing. RetainPtr pCategory = pNames->GetMutableDictFor(category); if (!pCategory) { pCategory = pDoc->NewIndirect(); pCategory->SetNewFor("Names"); pNames->SetNewFor(category, pDoc, pCategory->GetObjNum()); } return pdfium::WrapUnique(new CPDF_NameTree(pCategory)); // Private ctor. } // static std::unique_ptr CPDF_NameTree::CreateForTesting( CPDF_Dictionary* pRoot) { return pdfium::WrapUnique( new CPDF_NameTree(pdfium::WrapRetain(pRoot))); // Private ctor. } // static RetainPtr CPDF_NameTree::LookupNamedDest( CPDF_Document* pDoc, const ByteString& name) { RetainPtr dest_array; std::unique_ptr name_tree = Create(pDoc, "Dests"); if (name_tree) dest_array = name_tree->LookupNewStyleNamedDest(name); if (!dest_array) dest_array = LookupOldStyleNamedDest(pDoc, name); return dest_array; } size_t CPDF_NameTree::GetCount() const { std::set seen; return CountNamesInternal(m_pRoot.Get(), 0, seen); } bool CPDF_NameTree::AddValueAndName(RetainPtr pObj, const WideString& name) { RetainPtr pFind; int nFindIndex = -1; // Handle the corner case where the root node is empty. i.e. No kids and no // names. In which case, just insert into it and skip all the searches. RetainPtr pNames = m_pRoot->GetMutableArrayFor("Names"); if (pNames && pNames->IsEmpty() && !m_pRoot->GetArrayFor("Kids")) pFind = pNames; if (!pFind) { // Fail if the tree already contains this name or if the tree is too deep. if (SearchNameNodeByName(m_pRoot, name, &pFind, &nFindIndex)) return false; } // If the returned |pFind| is a nullptr, then |name| is smaller than all // existing entries in the tree, and we did not find a leaf array to place // |name| into. We instead will find the leftmost leaf array in which to place // |name| and |pObj|. if (!pFind) { std::optional result = SearchNameNodeByIndex(m_pRoot.Get(), 0); if (!result.has_value()) { // Give up if that fails too. return false; } pFind = result.value().container; DCHECK(pFind); } // Insert the name and the object into the leaf array found. Note that the // insertion position is right after the key-value pair returned by |index|. size_t nNameIndex = (nFindIndex + 1) * 2; size_t nValueIndex = nNameIndex + 1; pFind->InsertNewAt(nNameIndex, name.AsStringView()); pFind->InsertAt(nValueIndex, std::move(pObj)); // Expand the limits that the newly added name is under, if the name falls // outside of the limits of its leaf array or any arrays above it. std::vector all_limits = GetNodeAncestorsLimits(m_pRoot, pFind.Get()); for (auto* pLimits : all_limits) { if (!pLimits) continue; if (name.Compare(pLimits->GetUnicodeTextAt(0)) < 0) pLimits->SetNewAt(0, name.AsStringView()); if (name.Compare(pLimits->GetUnicodeTextAt(1)) > 0) pLimits->SetNewAt(1, name.AsStringView()); } return true; } bool CPDF_NameTree::DeleteValueAndName(size_t nIndex) { std::optional result = SearchNameNodeByIndex(m_pRoot.Get(), nIndex); if (!result) { // Fail if the tree does not contain |nIndex|. return false; } // Remove the name and the object from the leaf array |pFind|. RetainPtr pFind = result.value().container; pFind->RemoveAt(result.value().index + 1); pFind->RemoveAt(result.value().index); // Delete empty nodes and update the limits of |pFind|'s ancestors as needed. UpdateNodesAndLimitsUponDeletion(m_pRoot.Get(), pFind.Get(), result.value().key, 0); return true; } RetainPtr CPDF_NameTree::LookupValueAndName( size_t nIndex, WideString* csName) const { std::optional result = SearchNameNodeByIndex(m_pRoot.Get(), nIndex); if (!result) { csName->clear(); return nullptr; } *csName = std::move(result.value().key); return result.value().value; } RetainPtr CPDF_NameTree::LookupValue( const WideString& csName) const { return SearchNameNodeByName(m_pRoot, csName, nullptr, nullptr); } RetainPtr CPDF_NameTree::LookupNewStyleNamedDest( const ByteString& sName) { return GetNamedDestFromObject( LookupValue(PDF_DecodeText(sName.unsigned_span()))); }