1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfdoc/cpdf_nametree.h"
8
9 #include <utility>
10 #include <vector>
11
12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_string.h"
16 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
17
18 namespace {
19
20 constexpr int kNameTreeMaxRecursion = 32;
21
GetNodeLimitsMaybeSwap(CPDF_Array * pLimits)22 std::pair<WideString, WideString> GetNodeLimitsMaybeSwap(CPDF_Array* pLimits) {
23 ASSERT(pLimits);
24 WideString csLeft = pLimits->GetUnicodeTextAt(0);
25 WideString csRight = pLimits->GetUnicodeTextAt(1);
26 // If the lower limit is greater than the upper limit, swap them.
27 if (csLeft.Compare(csRight) > 0) {
28 pLimits->SetNewAt<CPDF_String>(0, csRight);
29 pLimits->SetNewAt<CPDF_String>(1, csLeft);
30 csLeft = pLimits->GetUnicodeTextAt(0);
31 csRight = pLimits->GetUnicodeTextAt(1);
32 }
33 return {csLeft, csRight};
34 }
35
36 // Get the limit arrays that leaf array |pFind| is under in the tree with root
37 // |pNode|. |pLimits| will hold all the limit arrays from the leaf up to before
38 // the root. Return true if successful.
GetNodeAncestorsLimits(CPDF_Dictionary * pNode,const CPDF_Array * pFind,int nLevel,std::vector<CPDF_Array * > * pLimits)39 bool GetNodeAncestorsLimits(CPDF_Dictionary* pNode,
40 const CPDF_Array* pFind,
41 int nLevel,
42 std::vector<CPDF_Array*>* pLimits) {
43 if (nLevel > kNameTreeMaxRecursion)
44 return false;
45
46 if (pNode->GetArrayFor("Names") == pFind) {
47 pLimits->push_back(pNode->GetArrayFor("Limits"));
48 return true;
49 }
50
51 CPDF_Array* pKids = pNode->GetArrayFor("Kids");
52 if (!pKids)
53 return false;
54
55 for (size_t i = 0; i < pKids->size(); ++i) {
56 CPDF_Dictionary* pKid = pKids->GetDictAt(i);
57 if (!pKid)
58 continue;
59
60 if (GetNodeAncestorsLimits(pKid, pFind, nLevel + 1, pLimits)) {
61 pLimits->push_back(pNode->GetArrayFor("Limits"));
62 return true;
63 }
64 }
65 return false;
66 }
67
68 // Upon the deletion of |csName| from leaf array |pFind|, update the ancestors
69 // of |pFind|. Specifically, the limits of |pFind|'s ancestors will be updated
70 // if needed, and any ancestors that are now empty will be removed.
UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary * pNode,const CPDF_Array * pFind,const WideString & csName,int nLevel)71 bool UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary* pNode,
72 const CPDF_Array* pFind,
73 const WideString& csName,
74 int nLevel) {
75 if (nLevel > kNameTreeMaxRecursion)
76 return false;
77
78 CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
79 WideString csLeft;
80 WideString csRight;
81 if (pLimits)
82 std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
83
84 CPDF_Array* pNames = pNode->GetArrayFor("Names");
85 if (pNames) {
86 if (pNames != pFind)
87 return false;
88 if (pNames->IsEmpty() || !pLimits)
89 return true;
90 if (csLeft != csName && csRight != csName)
91 return true;
92
93 // Since |csName| defines |pNode|'s limits, we need to loop through the
94 // names to find the new lower and upper limits.
95 WideString csNewLeft = csRight;
96 WideString csNewRight = csLeft;
97 for (size_t i = 0; i < pNames->size() / 2; ++i) {
98 WideString wsName = pNames->GetUnicodeTextAt(i * 2);
99 if (wsName.Compare(csNewLeft) < 0)
100 csNewLeft = wsName;
101 if (wsName.Compare(csNewRight) > 0)
102 csNewRight = wsName;
103 }
104 pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
105 pLimits->SetNewAt<CPDF_String>(1, csNewRight);
106 return true;
107 }
108
109 CPDF_Array* pKids = pNode->GetArrayFor("Kids");
110 if (!pKids)
111 return false;
112
113 // Loop through the kids to find the leaf array |pFind|.
114 for (size_t i = 0; i < pKids->size(); ++i) {
115 CPDF_Dictionary* pKid = pKids->GetDictAt(i);
116 if (!pKid)
117 continue;
118 if (!UpdateNodesAndLimitsUponDeletion(pKid, pFind, csName, nLevel + 1))
119 continue;
120
121 // Remove this child node if it's empty.
122 if ((pKid->KeyExist("Names") && pKid->GetArrayFor("Names")->IsEmpty()) ||
123 (pKid->KeyExist("Kids") && pKid->GetArrayFor("Kids")->IsEmpty())) {
124 pKids->RemoveAt(i);
125 }
126 if (pKids->IsEmpty() || !pLimits)
127 return true;
128 if (csLeft != csName && csRight != csName)
129 return true;
130
131 // Since |csName| defines |pNode|'s limits, we need to loop through the
132 // kids to find the new lower and upper limits.
133 WideString csNewLeft = csRight;
134 WideString csNewRight = csLeft;
135 for (size_t j = 0; j < pKids->size(); ++j) {
136 CPDF_Array* pKidLimits = pKids->GetDictAt(j)->GetArrayFor("Limits");
137 ASSERT(pKidLimits);
138 if (pKidLimits->GetUnicodeTextAt(0).Compare(csNewLeft) < 0)
139 csNewLeft = pKidLimits->GetUnicodeTextAt(0);
140 if (pKidLimits->GetUnicodeTextAt(1).Compare(csNewRight) > 0)
141 csNewRight = pKidLimits->GetUnicodeTextAt(1);
142 }
143 pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
144 pLimits->SetNewAt<CPDF_String>(1, csNewRight);
145 return true;
146 }
147 return false;
148 }
149
150 // Search for |csName| in the tree with root |pNode|. If successful, return the
151 // value that |csName| points to; |nIndex| will be the index of |csName|,
152 // |ppFind| will be the leaf array that |csName| is found in, and |pFindIndex|
153 // will be the index of |csName| in |ppFind|. If |csName| is not found, |ppFind|
154 // will be the leaf array that |csName| should be added to, and |pFindIndex|
155 // will be the index that it should be added at.
SearchNameNodeByName(CPDF_Dictionary * pNode,const WideString & csName,int nLevel,size_t * nIndex,CPDF_Array ** ppFind,int * pFindIndex)156 CPDF_Object* SearchNameNodeByName(CPDF_Dictionary* pNode,
157 const WideString& csName,
158 int nLevel,
159 size_t* nIndex,
160 CPDF_Array** ppFind,
161 int* pFindIndex) {
162 if (nLevel > kNameTreeMaxRecursion)
163 return nullptr;
164
165 CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
166 CPDF_Array* pNames = pNode->GetArrayFor("Names");
167 if (pLimits) {
168 WideString csLeft;
169 WideString csRight;
170 std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
171 // Skip this node if the name to look for is smaller than its lower limit.
172 if (csName.Compare(csLeft) < 0)
173 return nullptr;
174
175 // Skip this node if the name to look for is greater than its higher limit,
176 // and the node itself is a leaf node.
177 if (csName.Compare(csRight) > 0 && pNames) {
178 if (ppFind)
179 *ppFind = pNames;
180 if (pFindIndex)
181 *pFindIndex = pNames->size() / 2 - 1;
182
183 return nullptr;
184 }
185 }
186
187 // If the node is a leaf node, look for the name in its names array.
188 if (pNames) {
189 size_t dwCount = pNames->size() / 2;
190 for (size_t i = 0; i < dwCount; i++) {
191 WideString csValue = pNames->GetUnicodeTextAt(i * 2);
192 int32_t iCompare = csValue.Compare(csName);
193 if (iCompare > 0)
194 break;
195 if (ppFind)
196 *ppFind = pNames;
197 if (pFindIndex)
198 *pFindIndex = i;
199 if (iCompare < 0)
200 continue;
201
202 *nIndex += i;
203 return pNames->GetDirectObjectAt(i * 2 + 1);
204 }
205 *nIndex += dwCount;
206 return nullptr;
207 }
208
209 // Search through the node's children.
210 CPDF_Array* pKids = pNode->GetArrayFor("Kids");
211 if (!pKids)
212 return nullptr;
213
214 for (size_t i = 0; i < pKids->size(); i++) {
215 CPDF_Dictionary* pKid = pKids->GetDictAt(i);
216 if (!pKid)
217 continue;
218
219 CPDF_Object* pFound = SearchNameNodeByName(pKid, csName, nLevel + 1, nIndex,
220 ppFind, pFindIndex);
221 if (pFound)
222 return pFound;
223 }
224 return nullptr;
225 }
226
227 // Get the key-value pair at |nIndex| in the tree with root |pNode|. If
228 // successful, return the value object; |csName| will be the key, |ppFind|
229 // will be the leaf array that this pair is in, and |pFindIndex| will be the
230 // index of the pair in |pFind|.
SearchNameNodeByIndex(CPDF_Dictionary * pNode,size_t nIndex,int nLevel,size_t * nCurIndex,WideString * csName,CPDF_Array ** ppFind,int * pFindIndex)231 CPDF_Object* SearchNameNodeByIndex(CPDF_Dictionary* pNode,
232 size_t nIndex,
233 int nLevel,
234 size_t* nCurIndex,
235 WideString* csName,
236 CPDF_Array** ppFind,
237 int* pFindIndex) {
238 if (nLevel > kNameTreeMaxRecursion)
239 return nullptr;
240
241 CPDF_Array* pNames = pNode->GetArrayFor("Names");
242 if (pNames) {
243 size_t nCount = pNames->size() / 2;
244 if (nIndex >= *nCurIndex + nCount) {
245 *nCurIndex += nCount;
246 return nullptr;
247 }
248 if (ppFind)
249 *ppFind = pNames;
250 if (pFindIndex)
251 *pFindIndex = nIndex - *nCurIndex;
252
253 *csName = pNames->GetUnicodeTextAt((nIndex - *nCurIndex) * 2);
254 return pNames->GetDirectObjectAt((nIndex - *nCurIndex) * 2 + 1);
255 }
256
257 CPDF_Array* pKids = pNode->GetArrayFor("Kids");
258 if (!pKids)
259 return nullptr;
260
261 for (size_t i = 0; i < pKids->size(); i++) {
262 CPDF_Dictionary* pKid = pKids->GetDictAt(i);
263 if (!pKid)
264 continue;
265 CPDF_Object* pFound = SearchNameNodeByIndex(
266 pKid, nIndex, nLevel + 1, nCurIndex, csName, ppFind, pFindIndex);
267 if (pFound)
268 return pFound;
269 }
270 return nullptr;
271 }
272
273 // Get the total number of key-value pairs in the tree with root |pNode|.
CountNamesInternal(CPDF_Dictionary * pNode,int nLevel)274 size_t CountNamesInternal(CPDF_Dictionary* pNode, int nLevel) {
275 if (nLevel > kNameTreeMaxRecursion)
276 return 0;
277
278 CPDF_Array* pNames = pNode->GetArrayFor("Names");
279 if (pNames)
280 return pNames->size() / 2;
281
282 CPDF_Array* pKids = pNode->GetArrayFor("Kids");
283 if (!pKids)
284 return 0;
285
286 size_t nCount = 0;
287 for (size_t i = 0; i < pKids->size(); i++) {
288 CPDF_Dictionary* pKid = pKids->GetDictAt(i);
289 if (!pKid)
290 continue;
291
292 nCount += CountNamesInternal(pKid, nLevel + 1);
293 }
294 return nCount;
295 }
296
297 } // namespace
298
CPDF_NameTree(CPDF_Dictionary * pRoot)299 CPDF_NameTree::CPDF_NameTree(CPDF_Dictionary* pRoot) : m_pRoot(pRoot) {}
300
CPDF_NameTree(CPDF_Document * pDoc,const ByteString & category)301 CPDF_NameTree::CPDF_NameTree(CPDF_Document* pDoc, const ByteString& category) {
302 CPDF_Dictionary* pRoot = pDoc->GetRoot();
303 if (!pRoot)
304 return;
305
306 CPDF_Dictionary* pNames = pRoot->GetDictFor("Names");
307 if (!pNames)
308 return;
309
310 m_pRoot.Reset(pNames->GetDictFor(category));
311 }
312
~CPDF_NameTree()313 CPDF_NameTree::~CPDF_NameTree() {}
314
GetCount() const315 size_t CPDF_NameTree::GetCount() const {
316 return m_pRoot ? CountNamesInternal(m_pRoot.Get(), 0) : 0;
317 }
318
AddValueAndName(RetainPtr<CPDF_Object> pObj,const WideString & name)319 bool CPDF_NameTree::AddValueAndName(RetainPtr<CPDF_Object> pObj,
320 const WideString& name) {
321 if (!m_pRoot)
322 return false;
323
324 size_t nIndex = 0;
325 CPDF_Array* pFind = nullptr;
326 int nFindIndex = -1;
327 // Fail if the tree already contains this name or if the tree is too deep.
328 if (SearchNameNodeByName(m_pRoot.Get(), name, 0, &nIndex, &pFind,
329 &nFindIndex)) {
330 return false;
331 }
332
333 // If the returned |pFind| is a nullptr, then |name| is smaller than all
334 // existing entries in the tree, and we did not find a leaf array to place
335 // |name| into. We instead will find the leftmost leaf array in which to place
336 // |name| and |pObj|.
337 if (!pFind) {
338 size_t nCurIndex = 0;
339 WideString csName;
340 SearchNameNodeByIndex(m_pRoot.Get(), 0, 0, &nCurIndex, &csName, &pFind,
341 nullptr);
342 }
343 // Give up if that fails too.
344 if (!pFind)
345 return false;
346
347 // Insert the name and the object into the leaf array found. Note that the
348 // insertion position is right after the key-value pair returned by |index|.
349 size_t nNameIndex = (nFindIndex + 1) * 2;
350 size_t nValueIndex = nNameIndex + 1;
351 pFind->InsertNewAt<CPDF_String>(nNameIndex, name);
352 pFind->InsertAt(nValueIndex, std::move(pObj));
353
354 // Expand the limits that the newly added name is under, if the name falls
355 // outside of the limits of its leaf array or any arrays above it.
356 std::vector<CPDF_Array*> pLimits;
357 GetNodeAncestorsLimits(m_pRoot.Get(), pFind, 0, &pLimits);
358 for (auto* pLimit : pLimits) {
359 if (!pLimit)
360 continue;
361
362 if (name.Compare(pLimit->GetUnicodeTextAt(0)) < 0)
363 pLimit->SetNewAt<CPDF_String>(0, name);
364
365 if (name.Compare(pLimit->GetUnicodeTextAt(1)) > 0)
366 pLimit->SetNewAt<CPDF_String>(1, name);
367 }
368 return true;
369 }
370
DeleteValueAndName(int nIndex)371 bool CPDF_NameTree::DeleteValueAndName(int nIndex) {
372 if (!m_pRoot)
373 return false;
374
375 size_t nCurIndex = 0;
376 WideString csName;
377 CPDF_Array* pFind = nullptr;
378 int nFindIndex = -1;
379 // Fail if the tree does not contain |nIndex|.
380 if (!SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, &csName,
381 &pFind, &nFindIndex)) {
382 return false;
383 }
384
385 // Remove the name and the object from the leaf array |pFind|.
386 pFind->RemoveAt(nFindIndex * 2);
387 pFind->RemoveAt(nFindIndex * 2);
388
389 // Delete empty nodes and update the limits of |pFind|'s ancestors as needed.
390 UpdateNodesAndLimitsUponDeletion(m_pRoot.Get(), pFind, csName, 0);
391 return true;
392 }
393
LookupValueAndName(int nIndex,WideString * csName) const394 CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
395 WideString* csName) const {
396 csName->clear();
397 if (!m_pRoot)
398 return nullptr;
399
400 size_t nCurIndex = 0;
401 return SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, csName,
402 nullptr, nullptr);
403 }
404
LookupValue(const WideString & csName) const405 CPDF_Object* CPDF_NameTree::LookupValue(const WideString& csName) const {
406 if (!m_pRoot)
407 return nullptr;
408
409 size_t nIndex = 0;
410 return SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr,
411 nullptr);
412 }
413
LookupNamedDest(CPDF_Document * pDoc,const WideString & sName)414 CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc,
415 const WideString& sName) {
416 CPDF_Object* pValue = LookupValue(sName);
417 if (!pValue) {
418 CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests");
419 if (!pDests)
420 return nullptr;
421 pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName));
422 }
423 if (!pValue)
424 return nullptr;
425 if (CPDF_Array* pArray = pValue->AsArray())
426 return pArray;
427 if (CPDF_Dictionary* pDict = pValue->AsDictionary())
428 return pDict->GetArrayFor("D");
429 return nullptr;
430 }
431