1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfdoc/cpdf_nametree.h"
8
9 #include <utility>
10 #include <vector>
11
12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_string.h"
16 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
17
18 namespace {
19
20 constexpr int kNameTreeMaxRecursion = 32;
21
GetNodeLimitsMaybeSwap(CPDF_Array * pLimits)22 std::pair<WideString, WideString> GetNodeLimitsMaybeSwap(CPDF_Array* pLimits) {
23 ASSERT(pLimits);
24 WideString csLeft = pLimits->GetUnicodeTextAt(0);
25 WideString csRight = pLimits->GetUnicodeTextAt(1);
26 // If the lower limit is greater than the upper limit, swap them.
27 if (csLeft.Compare(csRight) > 0) {
28 pLimits->SetNewAt<CPDF_String>(0, csRight);
29 pLimits->SetNewAt<CPDF_String>(1, csLeft);
30 csLeft = pLimits->GetUnicodeTextAt(0);
31 csRight = pLimits->GetUnicodeTextAt(1);
32 }
33 return {csLeft, csRight};
34 }
35
36 // Get the limit arrays that leaf array |pFind| is under in the tree with root
37 // |pNode|. |pLimits| will hold all the limit arrays from the leaf up to before
38 // the root. Return true if successful.
GetNodeAncestorsLimits(const CPDF_Dictionary * pNode,const CPDF_Array * pFind,int nLevel,std::vector<CPDF_Array * > * pLimits)39 bool GetNodeAncestorsLimits(const CPDF_Dictionary* pNode,
40 const CPDF_Array* pFind,
41 int nLevel,
42 std::vector<CPDF_Array*>* pLimits) {
43 if (nLevel > kNameTreeMaxRecursion)
44 return false;
45
46 if (pNode->GetArrayFor("Names") == pFind) {
47 pLimits->push_back(pNode->GetArrayFor("Limits"));
48 return true;
49 }
50
51 CPDF_Array* pKids = pNode->GetArrayFor("Kids");
52 if (!pKids)
53 return false;
54
55 for (size_t i = 0; i < pKids->GetCount(); ++i) {
56 CPDF_Dictionary* pKid = pKids->GetDictAt(i);
57 if (!pKid)
58 continue;
59
60 if (GetNodeAncestorsLimits(pKid, pFind, nLevel + 1, pLimits)) {
61 pLimits->push_back(pNode->GetArrayFor("Limits"));
62 return true;
63 }
64 }
65 return false;
66 }
67
68 // Upon the deletion of |csName| from leaf array |pFind|, update the ancestors
69 // of |pFind|. Specifically, the limits of |pFind|'s ancestors will be updated
70 // if needed, and any ancestors that are now empty will be removed.
UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary * pNode,const CPDF_Array * pFind,const WideString & csName,int nLevel)71 bool UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary* pNode,
72 const CPDF_Array* pFind,
73 const WideString& csName,
74 int nLevel) {
75 if (nLevel > kNameTreeMaxRecursion)
76 return false;
77
78 CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
79 WideString csLeft;
80 WideString csRight;
81 if (pLimits)
82 std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
83
84 CPDF_Array* pNames = pNode->GetArrayFor("Names");
85 if (pNames) {
86 if (pNames != pFind)
87 return false;
88 if (pNames->IsEmpty() || !pLimits)
89 return true;
90 if (csLeft != csName && csRight != csName)
91 return true;
92
93 // Since |csName| defines |pNode|'s limits, we need to loop through the
94 // names to find the new lower and upper limits.
95 WideString csNewLeft = csRight;
96 WideString csNewRight = csLeft;
97 for (size_t i = 0; i < pNames->GetCount() / 2; ++i) {
98 WideString wsName = pNames->GetUnicodeTextAt(i * 2);
99 if (wsName.Compare(csNewLeft) < 0)
100 csNewLeft = wsName;
101 if (wsName.Compare(csNewRight) > 0)
102 csNewRight = wsName;
103 }
104 pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
105 pLimits->SetNewAt<CPDF_String>(1, csNewRight);
106 return true;
107 }
108
109 CPDF_Array* pKids = pNode->GetArrayFor("Kids");
110 if (!pKids)
111 return false;
112
113 // Loop through the kids to find the leaf array |pFind|.
114 for (size_t i = 0; i < pKids->GetCount(); ++i) {
115 CPDF_Dictionary* pKid = pKids->GetDictAt(i);
116 if (!pKid)
117 continue;
118 if (!UpdateNodesAndLimitsUponDeletion(pKid, pFind, csName, nLevel + 1))
119 continue;
120
121 // Remove this child node if it's empty.
122 if ((pKid->KeyExist("Names") && pKid->GetArrayFor("Names")->IsEmpty()) ||
123 (pKid->KeyExist("Kids") && pKid->GetArrayFor("Kids")->IsEmpty())) {
124 pKids->RemoveAt(i);
125 }
126 if (pKids->IsEmpty() || !pLimits)
127 return true;
128 if (csLeft != csName && csRight != csName)
129 return true;
130
131 // Since |csName| defines |pNode|'s limits, we need to loop through the
132 // kids to find the new lower and upper limits.
133 WideString csNewLeft = csRight;
134 WideString csNewRight = csLeft;
135 for (size_t j = 0; j < pKids->GetCount(); ++j) {
136 CPDF_Array* pKidLimits = pKids->GetDictAt(j)->GetArrayFor("Limits");
137 ASSERT(pKidLimits);
138 if (pKidLimits->GetUnicodeTextAt(0).Compare(csNewLeft) < 0)
139 csNewLeft = pKidLimits->GetUnicodeTextAt(0);
140 if (pKidLimits->GetUnicodeTextAt(1).Compare(csNewRight) > 0)
141 csNewRight = pKidLimits->GetUnicodeTextAt(1);
142 }
143 pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
144 pLimits->SetNewAt<CPDF_String>(1, csNewRight);
145 return true;
146 }
147 return false;
148 }
149
150 // Search for |csName| in the tree with root |pNode|. If successful, return the
151 // value that |csName| points to; |nIndex| will be the index of |csName|,
152 // |ppFind| will be the leaf array that |csName| is found in, and |pFindIndex|
153 // will be the index of |csName| in |ppFind|. If |csName| is not found, |ppFind|
154 // will be the leaf array that |csName| should be added to, and |pFindIndex|
155 // will be the index that it should be added at.
SearchNameNodeByName(const CPDF_Dictionary * pNode,const WideString & csName,int nLevel,size_t * nIndex,CPDF_Array ** ppFind,int * pFindIndex)156 CPDF_Object* SearchNameNodeByName(const CPDF_Dictionary* pNode,
157 const WideString& csName,
158 int nLevel,
159 size_t* nIndex,
160 CPDF_Array** ppFind,
161 int* pFindIndex) {
162 if (nLevel > kNameTreeMaxRecursion)
163 return nullptr;
164
165 CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
166 CPDF_Array* pNames = pNode->GetArrayFor("Names");
167 if (pLimits) {
168 WideString csLeft;
169 WideString csRight;
170 std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
171 // Skip this node if the name to look for is smaller than its lower limit.
172 if (csName.Compare(csLeft) < 0)
173 return nullptr;
174
175 // Skip this node if the name to look for is greater than its higher limit,
176 // and the node itself is a leaf node.
177 if (csName.Compare(csRight) > 0 && pNames) {
178 if (ppFind)
179 *ppFind = pNames;
180 if (pFindIndex)
181 *pFindIndex = pNames->GetCount() / 2 - 1;
182
183 return nullptr;
184 }
185 }
186
187 // If the node is a leaf node, look for the name in its names array.
188 if (pNames) {
189 size_t dwCount = pNames->GetCount() / 2;
190 for (size_t i = 0; i < dwCount; i++) {
191 WideString csValue = pNames->GetUnicodeTextAt(i * 2);
192 int32_t iCompare = csValue.Compare(csName);
193 if (iCompare > 0)
194 break;
195 if (ppFind)
196 *ppFind = pNames;
197 if (pFindIndex)
198 *pFindIndex = i;
199 if (iCompare < 0)
200 continue;
201
202 *nIndex += i;
203 return pNames->GetDirectObjectAt(i * 2 + 1);
204 }
205 *nIndex += dwCount;
206 return nullptr;
207 }
208
209 // Search through the node's children.
210 CPDF_Array* pKids = pNode->GetArrayFor("Kids");
211 if (!pKids)
212 return nullptr;
213
214 for (size_t i = 0; i < pKids->GetCount(); i++) {
215 CPDF_Dictionary* pKid = pKids->GetDictAt(i);
216 if (!pKid)
217 continue;
218
219 CPDF_Object* pFound = SearchNameNodeByName(pKid, csName, nLevel + 1, nIndex,
220 ppFind, pFindIndex);
221 if (pFound)
222 return pFound;
223 }
224 return nullptr;
225 }
226
227 // Get the key-value pair at |nIndex| in the tree with root |pNode|. If
228 // successful, return the value object; |csName| will be the key, |ppFind|
229 // will be the leaf array that this pair is in, and |pFindIndex| will be the
230 // index of the pair in |pFind|.
SearchNameNodeByIndex(const CPDF_Dictionary * pNode,size_t nIndex,int nLevel,size_t * nCurIndex,WideString * csName,CPDF_Array ** ppFind,int * pFindIndex)231 CPDF_Object* SearchNameNodeByIndex(const CPDF_Dictionary* pNode,
232 size_t nIndex,
233 int nLevel,
234 size_t* nCurIndex,
235 WideString* csName,
236 CPDF_Array** ppFind,
237 int* pFindIndex) {
238 if (nLevel > kNameTreeMaxRecursion)
239 return nullptr;
240
241 CPDF_Array* pNames = pNode->GetArrayFor("Names");
242 if (pNames) {
243 size_t nCount = pNames->GetCount() / 2;
244 if (nIndex >= *nCurIndex + nCount) {
245 *nCurIndex += nCount;
246 return nullptr;
247 }
248 if (ppFind)
249 *ppFind = pNames;
250 if (pFindIndex)
251 *pFindIndex = nIndex - *nCurIndex;
252
253 *csName = pNames->GetUnicodeTextAt((nIndex - *nCurIndex) * 2);
254 return pNames->GetDirectObjectAt((nIndex - *nCurIndex) * 2 + 1);
255 }
256
257 CPDF_Array* pKids = pNode->GetArrayFor("Kids");
258 if (!pKids)
259 return nullptr;
260
261 for (size_t i = 0; i < pKids->GetCount(); i++) {
262 CPDF_Dictionary* pKid = pKids->GetDictAt(i);
263 if (!pKid)
264 continue;
265 CPDF_Object* pFound = SearchNameNodeByIndex(
266 pKid, nIndex, nLevel + 1, nCurIndex, csName, ppFind, pFindIndex);
267 if (pFound)
268 return pFound;
269 }
270 return nullptr;
271 }
272
273 // Get the total number of key-value pairs in the tree with root |pNode|.
CountNamesInternal(CPDF_Dictionary * pNode,int nLevel)274 size_t CountNamesInternal(CPDF_Dictionary* pNode, int nLevel) {
275 if (nLevel > kNameTreeMaxRecursion)
276 return 0;
277
278 CPDF_Array* pNames = pNode->GetArrayFor("Names");
279 if (pNames)
280 return pNames->GetCount() / 2;
281
282 CPDF_Array* pKids = pNode->GetArrayFor("Kids");
283 if (!pKids)
284 return 0;
285
286 size_t nCount = 0;
287 for (size_t i = 0; i < pKids->GetCount(); i++) {
288 CPDF_Dictionary* pKid = pKids->GetDictAt(i);
289 if (!pKid)
290 continue;
291
292 nCount += CountNamesInternal(pKid, nLevel + 1);
293 }
294 return nCount;
295 }
296
297 } // namespace
298
CPDF_NameTree(CPDF_Dictionary * pRoot)299 CPDF_NameTree::CPDF_NameTree(CPDF_Dictionary* pRoot) : m_pRoot(pRoot) {}
300
CPDF_NameTree(const CPDF_Document * pDoc,const ByteString & category)301 CPDF_NameTree::CPDF_NameTree(const CPDF_Document* pDoc,
302 const ByteString& category)
303 : m_pRoot(nullptr) {
304 const CPDF_Dictionary* pRoot = pDoc->GetRoot();
305 if (!pRoot)
306 return;
307
308 CPDF_Dictionary* pNames = pRoot->GetDictFor("Names");
309 if (!pNames)
310 return;
311
312 m_pRoot = pNames->GetDictFor(category);
313 }
314
~CPDF_NameTree()315 CPDF_NameTree::~CPDF_NameTree() {}
316
GetCount() const317 size_t CPDF_NameTree::GetCount() const {
318 return m_pRoot ? CountNamesInternal(m_pRoot.Get(), 0) : 0;
319 }
320
GetIndex(const WideString & csName) const321 int CPDF_NameTree::GetIndex(const WideString& csName) const {
322 if (!m_pRoot)
323 return -1;
324
325 size_t nIndex = 0;
326 if (!SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr,
327 nullptr)) {
328 return -1;
329 }
330 return nIndex;
331 }
332
AddValueAndName(std::unique_ptr<CPDF_Object> pObj,const WideString & name)333 bool CPDF_NameTree::AddValueAndName(std::unique_ptr<CPDF_Object> pObj,
334 const WideString& name) {
335 if (!m_pRoot)
336 return false;
337
338 size_t nIndex = 0;
339 CPDF_Array* pFind = nullptr;
340 int nFindIndex = -1;
341 // Fail if the tree already contains this name or if the tree is too deep.
342 if (SearchNameNodeByName(m_pRoot.Get(), name, 0, &nIndex, &pFind,
343 &nFindIndex)) {
344 return false;
345 }
346
347 // If the returned |pFind| is a nullptr, then |name| is smaller than all
348 // existing entries in the tree, and we did not find a leaf array to place
349 // |name| into. We instead will find the leftmost leaf array in which to place
350 // |name| and |pObj|.
351 if (!pFind) {
352 size_t nCurIndex = 0;
353 WideString csName;
354 SearchNameNodeByIndex(m_pRoot.Get(), 0, 0, &nCurIndex, &csName, &pFind,
355 nullptr);
356 }
357 ASSERT(pFind);
358
359 // Insert the name and the object into the leaf array found. Note that the
360 // insertion position is right after the key-value pair returned by |index|.
361 size_t nNameIndex = (nFindIndex + 1) * 2;
362 size_t nValueIndex = nNameIndex + 1;
363 pFind->InsertNewAt<CPDF_String>(nNameIndex, name);
364 pFind->InsertAt(nValueIndex, std::move(pObj));
365
366 // Expand the limits that the newly added name is under, if the name falls
367 // outside of the limits of its leaf array or any arrays above it.
368 std::vector<CPDF_Array*> pLimits;
369 GetNodeAncestorsLimits(m_pRoot.Get(), pFind, 0, &pLimits);
370 for (auto* pLimit : pLimits) {
371 if (!pLimit)
372 continue;
373
374 if (name.Compare(pLimit->GetUnicodeTextAt(0)) < 0)
375 pLimit->SetNewAt<CPDF_String>(0, name);
376
377 if (name.Compare(pLimit->GetUnicodeTextAt(1)) > 0)
378 pLimit->SetNewAt<CPDF_String>(1, name);
379 }
380 return true;
381 }
382
DeleteValueAndName(int nIndex)383 bool CPDF_NameTree::DeleteValueAndName(int nIndex) {
384 if (!m_pRoot)
385 return false;
386
387 size_t nCurIndex = 0;
388 WideString csName;
389 CPDF_Array* pFind = nullptr;
390 int nFindIndex = -1;
391 // Fail if the tree does not contain |nIndex|.
392 if (!SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, &csName,
393 &pFind, &nFindIndex)) {
394 return false;
395 }
396
397 // Remove the name and the object from the leaf array |pFind|.
398 pFind->RemoveAt(nFindIndex * 2);
399 pFind->RemoveAt(nFindIndex * 2);
400
401 // Delete empty nodes and update the limits of |pFind|'s ancestors as needed.
402 UpdateNodesAndLimitsUponDeletion(m_pRoot.Get(), pFind, csName, 0);
403 return true;
404 }
405
LookupValueAndName(int nIndex,WideString * csName) const406 CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
407 WideString* csName) const {
408 csName->clear();
409 if (!m_pRoot)
410 return nullptr;
411
412 size_t nCurIndex = 0;
413 return SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, csName,
414 nullptr, nullptr);
415 }
416
LookupValue(const WideString & csName) const417 CPDF_Object* CPDF_NameTree::LookupValue(const WideString& csName) const {
418 if (!m_pRoot)
419 return nullptr;
420
421 size_t nIndex = 0;
422 return SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr,
423 nullptr);
424 }
425
LookupNamedDest(CPDF_Document * pDoc,const WideString & sName)426 CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc,
427 const WideString& sName) {
428 CPDF_Object* pValue = LookupValue(sName);
429 if (!pValue) {
430 CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests");
431 if (!pDests)
432 return nullptr;
433 pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName));
434 }
435 if (!pValue)
436 return nullptr;
437 if (CPDF_Array* pArray = pValue->AsArray())
438 return pArray;
439 if (CPDF_Dictionary* pDict = pValue->AsDictionary())
440 return pDict->GetArrayFor("D");
441 return nullptr;
442 }
443