• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/pdf/SkPDFDocumentPriv.h"
9 #include "src/pdf/SkPDFTag.h"
10 
11 // The struct parent tree consists of one entry per page, followed by
12 // entries for individual struct tree nodes corresponding to
13 // annotations.  Each entry is a key/value pair with an integer key
14 // and an indirect reference key.
15 //
16 // The page entries get consecutive keys starting at 0. Since we don't
17 // know the total number of pages in the document at the time we start
18 // processing annotations, start the key for annotations with a large
19 // number, which effectively becomes the maximum number of pages in a
20 // PDF we can handle.
21 const int kFirstAnnotationStructParentKey = 100000;
22 
23 struct SkPDFTagNode {
24     // Structure element nodes need a unique alphanumeric ID,
25     // and we need to be able to output them sorted in lexicographic
26     // order. This helper function takes one of our node IDs and
27     // builds an ID string that zero-pads the digits so that lexicographic
28     // order matches numeric order.
nodeIdToStringSkPDFTagNode29     static SkString nodeIdToString(int nodeId) {
30         SkString idString;
31         idString.printf("node%08d", nodeId);
32         return idString;
33     }
34 
35     SkPDFTagNode* fChildren = nullptr;
36     size_t fChildCount = 0;
37     struct MarkedContentInfo {
38         unsigned fPageIndex;
39         int fMarkId;
40     };
41     SkTArray<MarkedContentInfo> fMarkedContent;
42     int fNodeId;
43     SkString fTypeString;
44     SkString fAlt;
45     SkString fLang;
46     SkPDFIndirectReference fRef;
47     enum State {
48         kUnknown,
49         kYes,
50         kNo,
51     } fCanDiscard = kUnknown;
52     std::unique_ptr<SkPDFArray> fAttributes;
53     struct AnnotationInfo {
54         unsigned fPageIndex;
55         SkPDFIndirectReference fAnnotationRef;
56     };
57     std::vector<AnnotationInfo> fAnnotations;
58 };
59 
60 SkPDF::AttributeList::AttributeList() = default;
61 
62 SkPDF::AttributeList::~AttributeList() = default;
63 
appendInt(const char * owner,const char * name,int value)64 void SkPDF::AttributeList::appendInt(
65         const char* owner, const char* name, int value) {
66     if (!fAttrs)
67         fAttrs = SkPDFMakeArray();
68     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
69     attrDict->insertName("O", owner);
70     attrDict->insertInt(name, value);
71     fAttrs->appendObject(std::move(attrDict));
72 }
73 
appendFloat(const char * owner,const char * name,float value)74 void SkPDF::AttributeList::appendFloat(
75         const char* owner, const char* name, float value) {
76     if (!fAttrs)
77         fAttrs = SkPDFMakeArray();
78     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
79     attrDict->insertName("O", owner);
80     attrDict->insertScalar(name, value);
81     fAttrs->appendObject(std::move(attrDict));
82 }
83 
appendName(const char * owner,const char * name,const char * value)84 void SkPDF::AttributeList::appendName(
85         const char* owner, const char* name, const char* value) {
86     if (!fAttrs)
87         fAttrs = SkPDFMakeArray();
88     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
89     attrDict->insertName("O", owner);
90     attrDict->insertName(name, value);
91     fAttrs->appendObject(std::move(attrDict));
92 }
93 
appendString(const char * owner,const char * name,const char * value)94 void SkPDF::AttributeList::appendString(
95         const char* owner, const char* name, const char* value) {
96     if (!fAttrs)
97         fAttrs = SkPDFMakeArray();
98     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
99     attrDict->insertName("O", owner);
100     attrDict->insertString(name, value);
101     fAttrs->appendObject(std::move(attrDict));
102 }
103 
appendFloatArray(const char * owner,const char * name,const std::vector<float> & value)104 void SkPDF::AttributeList::appendFloatArray(
105         const char* owner, const char* name, const std::vector<float>& value) {
106     if (!fAttrs)
107         fAttrs = SkPDFMakeArray();
108     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
109     attrDict->insertName("O", owner);
110     std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
111     for (float element : value) {
112         pdfArray->appendScalar(element);
113     }
114     attrDict->insertObject(name, std::move(pdfArray));
115     fAttrs->appendObject(std::move(attrDict));
116 }
117 
118 // Deprecated.
appendStringArray(const char * owner,const char * name,const std::vector<SkString> & values)119 void SkPDF::AttributeList::appendStringArray(
120          const char* owner,
121          const char* name,
122          const std::vector<SkString>& values) {
123     if (!fAttrs)
124         fAttrs = SkPDFMakeArray();
125     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
126     attrDict->insertName("O", owner);
127     std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
128     for (const SkString& element : values) {
129         pdfArray->appendString(element);
130     }
131     attrDict->insertObject(name, std::move(pdfArray));
132     fAttrs->appendObject(std::move(attrDict));
133 }
134 
135 
appendNodeIdArray(const char * owner,const char * name,const std::vector<int> & nodeIds)136 void SkPDF::AttributeList::appendNodeIdArray(
137         const char* owner,
138         const char* name,
139         const std::vector<int>& nodeIds) {
140     if (!fAttrs)
141         fAttrs = SkPDFMakeArray();
142     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
143     attrDict->insertName("O", owner);
144     std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
145     for (int nodeId : nodeIds) {
146         SkString idString = SkPDFTagNode::nodeIdToString(nodeId);
147         pdfArray->appendString(idString);
148     }
149     attrDict->insertObject(name, std::move(pdfArray));
150     fAttrs->appendObject(std::move(attrDict));
151 }
152 
SkPDFTagTree()153 SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
154 
155 SkPDFTagTree::~SkPDFTagTree() = default;
156 
157 // static
Copy(SkPDF::StructureElementNode & node,SkPDFTagNode * dst,SkArenaAlloc * arena,SkTHashMap<int,SkPDFTagNode * > * nodeMap)158 void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node,
159                         SkPDFTagNode* dst,
160                         SkArenaAlloc* arena,
161                         SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
162     nodeMap->set(node.fNodeId, dst);
163     for (int nodeId : node.fAdditionalNodeIds) {
164         SkASSERT(!nodeMap->find(nodeId));
165         nodeMap->set(nodeId, dst);
166     }
167     dst->fNodeId = node.fNodeId;
168     dst->fTypeString = node.fTypeString;
169     dst->fAlt = node.fAlt;
170     dst->fLang = node.fLang;
171 
172     size_t childCount = node.fChildVector.size();
173     SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
174     dst->fChildCount = childCount;
175     dst->fChildren = children;
176     for (size_t i = 0; i < childCount; ++i) {
177         Copy(*node.fChildVector[i], &children[i], arena, nodeMap);
178     }
179 
180     dst->fAttributes = std::move(node.fAttributes.fAttrs);
181 }
182 
init(SkPDF::StructureElementNode * node)183 void SkPDFTagTree::init(SkPDF::StructureElementNode* node) {
184     if (node) {
185         fRoot = fArena.make<SkPDFTagNode>();
186         Copy(*node, fRoot, &fArena, &fNodeMap);
187     }
188 }
189 
createMarkIdForNodeId(int nodeId,unsigned pageIndex)190 int SkPDFTagTree::createMarkIdForNodeId(int nodeId, unsigned pageIndex) {
191     if (!fRoot) {
192         return -1;
193     }
194     SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
195     if (!tagPtr) {
196         return -1;
197     }
198     SkPDFTagNode* tag = *tagPtr;
199     SkASSERT(tag);
200     while (fMarksPerPage.size() < pageIndex + 1) {
201         fMarksPerPage.push_back();
202     }
203     SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex];
204     int markId = pageMarks.count();
205     tag->fMarkedContent.push_back({pageIndex, markId});
206     pageMarks.push_back(tag);
207     return markId;
208 }
209 
createStructParentKeyForNodeId(int nodeId,unsigned pageIndex)210 int SkPDFTagTree::createStructParentKeyForNodeId(int nodeId, unsigned pageIndex) {
211     if (!fRoot) {
212         return -1;
213     }
214     SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
215     if (!tagPtr) {
216         return -1;
217     }
218     SkPDFTagNode* tag = *tagPtr;
219     SkASSERT(tag);
220 
221     tag->fCanDiscard = SkPDFTagNode::kNo;
222 
223     int nextStructParentKey = kFirstAnnotationStructParentKey +
224         static_cast<int>(fParentTreeAnnotationNodeIds.size());
225     fParentTreeAnnotationNodeIds.push_back(nodeId);
226     return nextStructParentKey;
227 }
228 
can_discard(SkPDFTagNode * node)229 static bool can_discard(SkPDFTagNode* node) {
230     if (node->fCanDiscard == SkPDFTagNode::kYes) {
231         return true;
232     }
233     if (node->fCanDiscard == SkPDFTagNode::kNo) {
234         return false;
235     }
236     if (!node->fMarkedContent.empty()) {
237         node->fCanDiscard = SkPDFTagNode::kNo;
238         return false;
239     }
240     for (size_t i = 0; i < node->fChildCount; ++i) {
241         if (!can_discard(&node->fChildren[i])) {
242             node->fCanDiscard = SkPDFTagNode::kNo;
243             return false;
244         }
245     }
246     node->fCanDiscard = SkPDFTagNode::kYes;
247     return true;
248 }
249 
PrepareTagTreeToEmit(SkPDFIndirectReference parent,SkPDFTagNode * node,SkPDFDocument * doc)250 SkPDFIndirectReference SkPDFTagTree::PrepareTagTreeToEmit(SkPDFIndirectReference parent,
251                                                           SkPDFTagNode* node,
252                                                           SkPDFDocument* doc) {
253     SkPDFIndirectReference ref = doc->reserveRef();
254     std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
255     SkPDFTagNode* children = node->fChildren;
256     size_t childCount = node->fChildCount;
257     for (size_t i = 0; i < childCount; ++i) {
258         SkPDFTagNode* child = &children[i];
259         if (!(can_discard(child))) {
260             kids->appendRef(PrepareTagTreeToEmit(ref, child, doc));
261         }
262     }
263     for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
264         std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR");
265         mcr->insertRef("Pg", doc->getPage(info.fPageIndex));
266         mcr->insertInt("MCID", info.fMarkId);
267         kids->appendObject(std::move(mcr));
268     }
269     for (const SkPDFTagNode::AnnotationInfo& annotationInfo : node->fAnnotations) {
270         std::unique_ptr<SkPDFDict> annotationDict = SkPDFMakeDict("OBJR");
271         annotationDict->insertRef("Obj", annotationInfo.fAnnotationRef);
272         annotationDict->insertRef("Pg", doc->getPage(annotationInfo.fPageIndex));
273         kids->appendObject(std::move(annotationDict));
274     }
275     node->fRef = ref;
276     SkPDFDict dict("StructElem");
277     dict.insertName("S", node->fTypeString.isEmpty() ? "NonStruct" : node->fTypeString.c_str());
278     if (!node->fAlt.isEmpty()) {
279         dict.insertString("Alt", node->fAlt);
280     }
281     if (!node->fLang.isEmpty()) {
282         dict.insertString("Lang", node->fLang);
283     }
284     dict.insertRef("P", parent);
285     dict.insertObject("K", std::move(kids));
286     if (node->fAttributes) {
287         dict.insertObject("A", std::move(node->fAttributes));
288     }
289 
290     // Each node has a unique ID that also needs to be referenced
291     // in a separate IDTree node, along with the lowest and highest
292     // unique ID string.
293     SkString idString = SkPDFTagNode::nodeIdToString(node->fNodeId);
294     dict.insertString("ID", idString.c_str());
295     IDTreeEntry idTreeEntry = {node->fNodeId, ref};
296     fIdTreeEntries.push_back(idTreeEntry);
297 
298     return doc->emit(dict, ref);
299 }
300 
addNodeAnnotation(int nodeId,SkPDFIndirectReference annotationRef,unsigned pageIndex)301 void SkPDFTagTree::addNodeAnnotation(int nodeId, SkPDFIndirectReference annotationRef, unsigned pageIndex) {
302     if (!fRoot) {
303         return;
304     }
305     SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
306     if (!tagPtr) {
307         return;
308     }
309     SkPDFTagNode* tag = *tagPtr;
310     SkASSERT(tag);
311 
312     SkPDFTagNode::AnnotationInfo annotationInfo = {pageIndex, annotationRef};
313     tag->fAnnotations.push_back(annotationInfo);
314 }
315 
makeStructTreeRoot(SkPDFDocument * doc)316 SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
317     if (!fRoot || can_discard(fRoot)) {
318         return SkPDFIndirectReference();
319     }
320 
321     SkPDFIndirectReference ref = doc->reserveRef();
322 
323     unsigned pageCount = SkToUInt(doc->pageCount());
324 
325     // Build the StructTreeRoot.
326     SkPDFDict structTreeRoot("StructTreeRoot");
327     structTreeRoot.insertRef("K", PrepareTagTreeToEmit(ref, fRoot, doc));
328     structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
329 
330     // Build the parent tree, which consists of two things:
331     // (1) For each page, a mapping from the marked content IDs on
332     // each page to their corresponding tags
333     // (2) For each annotation, an indirect reference to that
334     // annotation's struct tree element.
335     SkPDFDict parentTree("ParentTree");
336     auto parentTreeNums = SkPDFMakeArray();
337 
338     // First, one entry per page.
339     SkASSERT(fMarksPerPage.size() <= pageCount);
340     for (size_t j = 0; j < fMarksPerPage.size(); ++j) {
341         const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
342         SkPDFArray markToTagArray;
343         for (SkPDFTagNode* mark : pageMarks) {
344             SkASSERT(mark->fRef);
345             markToTagArray.appendRef(mark->fRef);
346         }
347         parentTreeNums->appendInt(j);
348         parentTreeNums->appendRef(doc->emit(markToTagArray));
349     }
350 
351     // Then, one entry per annotation.
352     for (size_t j = 0; j < fParentTreeAnnotationNodeIds.size(); ++j) {
353         int nodeId = fParentTreeAnnotationNodeIds[j];
354         int structParentKey = kFirstAnnotationStructParentKey + static_cast<int>(j);
355 
356         SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
357         if (!tagPtr) {
358             continue;
359         }
360         SkPDFTagNode* tag = *tagPtr;
361         parentTreeNums->appendInt(structParentKey);
362         parentTreeNums->appendRef(tag->fRef);
363     }
364 
365     parentTree.insertObject("Nums", std::move(parentTreeNums));
366     structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
367 
368     // Build the IDTree, a mapping from every unique ID string to
369     // a reference to its corresponding structure element node.
370     if (!fIdTreeEntries.empty()) {
371         std::sort(fIdTreeEntries.begin(), fIdTreeEntries.end(),
372                   [](const IDTreeEntry& a, const IDTreeEntry& b) {
373                     return a.nodeId < b.nodeId;
374                   });
375 
376         SkPDFDict idTree;
377         SkPDFDict idTreeLeaf;
378         auto limits = SkPDFMakeArray();
379         SkString lowestNodeIdString = SkPDFTagNode::nodeIdToString(
380             fIdTreeEntries.begin()->nodeId);
381         limits->appendString(lowestNodeIdString);
382         SkString highestNodeIdString = SkPDFTagNode::nodeIdToString(
383             fIdTreeEntries.rbegin()->nodeId);
384         limits->appendString(highestNodeIdString);
385         idTreeLeaf.insertObject("Limits", std::move(limits));
386         auto names = SkPDFMakeArray();
387         for (const IDTreeEntry& entry : fIdTreeEntries) {
388           SkString idString = SkPDFTagNode::nodeIdToString(entry.nodeId);
389             names->appendString(idString);
390             names->appendRef(entry.ref);
391         }
392         idTreeLeaf.insertObject("Names", std::move(names));
393         auto idTreeKids = SkPDFMakeArray();
394         idTreeKids->appendRef(doc->emit(idTreeLeaf));
395         idTree.insertObject("Kids", std::move(idTreeKids));
396         structTreeRoot.insertRef("IDTree", doc->emit(idTree));
397     }
398 
399     return doc->emit(structTreeRoot, ref);
400 }
401