• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/pdf/SkPDFDocumentPriv.h"
9 #include "src/pdf/SkPDFTag.h"
10 
11 // The struct parent tree consists of one entry per page, followed by
12 // entries for individual struct tree nodes corresponding to
13 // annotations.  Each entry is a key/value pair with an integer key
14 // and an indirect reference key.
15 //
16 // The page entries get consecutive keys starting at 0. Since we don't
17 // know the total number of pages in the document at the time we start
18 // processing annotations, start the key for annotations with a large
19 // number, which effectively becomes the maximum number of pages in a
20 // PDF we can handle.
21 const int kFirstAnnotationStructParentKey = 100000;
22 
23 struct SkPDFTagNode {
24     // Structure element nodes need a unique alphanumeric ID,
25     // and we need to be able to output them sorted in lexicographic
26     // order. This helper function takes one of our node IDs and
27     // builds an ID string that zero-pads the digits so that lexicographic
28     // order matches numeric order.
nodeIdToStringSkPDFTagNode29     static SkString nodeIdToString(int nodeId) {
30         SkString idString;
31         idString.printf("node%08d", nodeId);
32         return idString;
33     }
34 
35     SkPDFTagNode* fChildren = nullptr;
36     size_t fChildCount = 0;
37     struct MarkedContentInfo {
38         unsigned fPageIndex;
39         int fMarkId;
40     };
41     SkTArray<MarkedContentInfo> fMarkedContent;
42     int fNodeId;
43     SkString fTypeString;
44     SkString fAlt;
45     SkString fLang;
46     SkPDFIndirectReference fRef;
47     enum State {
48         kUnknown,
49         kYes,
50         kNo,
51     } fCanDiscard = kUnknown;
52     std::unique_ptr<SkPDFArray> fAttributes;
53     struct AnnotationInfo {
54         unsigned fPageIndex;
55         SkPDFIndirectReference fAnnotationRef;
56     };
57     std::vector<AnnotationInfo> fAnnotations;
58 };
59 
60 SkPDF::AttributeList::AttributeList() = default;
61 
62 SkPDF::AttributeList::~AttributeList() = default;
63 
appendInt(const char * owner,const char * name,int value)64 void SkPDF::AttributeList::appendInt(
65         const char* owner, const char* name, int value) {
66     if (!fAttrs)
67         fAttrs = SkPDFMakeArray();
68     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
69     attrDict->insertName("O", owner);
70     attrDict->insertInt(name, value);
71     fAttrs->appendObject(std::move(attrDict));
72 }
73 
appendFloat(const char * owner,const char * name,float value)74 void SkPDF::AttributeList::appendFloat(
75         const char* owner, const char* name, float value) {
76     if (!fAttrs)
77         fAttrs = SkPDFMakeArray();
78     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
79     attrDict->insertName("O", owner);
80     attrDict->insertScalar(name, value);
81     fAttrs->appendObject(std::move(attrDict));
82 }
83 
appendName(const char * owner,const char * name,const char * value)84 void SkPDF::AttributeList::appendName(
85         const char* owner, const char* name, const char* value) {
86     if (!fAttrs)
87         fAttrs = SkPDFMakeArray();
88     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
89     attrDict->insertName("O", owner);
90     attrDict->insertName(name, value);
91     fAttrs->appendObject(std::move(attrDict));
92 }
93 
appendFloatArray(const char * owner,const char * name,const std::vector<float> & value)94 void SkPDF::AttributeList::appendFloatArray(
95         const char* owner, const char* name, const std::vector<float>& value) {
96     if (!fAttrs)
97         fAttrs = SkPDFMakeArray();
98     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
99     attrDict->insertName("O", owner);
100     std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
101     for (float element : value) {
102         pdfArray->appendScalar(element);
103     }
104     attrDict->insertObject(name, std::move(pdfArray));
105     fAttrs->appendObject(std::move(attrDict));
106 }
107 
appendNodeIdArray(const char * owner,const char * name,const std::vector<int> & nodeIds)108 void SkPDF::AttributeList::appendNodeIdArray(
109         const char* owner,
110         const char* name,
111         const std::vector<int>& nodeIds) {
112     if (!fAttrs)
113         fAttrs = SkPDFMakeArray();
114     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
115     attrDict->insertName("O", owner);
116     std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
117     for (int nodeId : nodeIds) {
118         SkString idString = SkPDFTagNode::nodeIdToString(nodeId);
119         pdfArray->appendByteString(idString);
120     }
121     attrDict->insertObject(name, std::move(pdfArray));
122     fAttrs->appendObject(std::move(attrDict));
123 }
124 
SkPDFTagTree()125 SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
126 
127 SkPDFTagTree::~SkPDFTagTree() = default;
128 
129 // static
Copy(SkPDF::StructureElementNode & node,SkPDFTagNode * dst,SkArenaAlloc * arena,SkTHashMap<int,SkPDFTagNode * > * nodeMap)130 void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node,
131                         SkPDFTagNode* dst,
132                         SkArenaAlloc* arena,
133                         SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
134     nodeMap->set(node.fNodeId, dst);
135     for (int nodeId : node.fAdditionalNodeIds) {
136         SkASSERT(!nodeMap->find(nodeId));
137         nodeMap->set(nodeId, dst);
138     }
139     dst->fNodeId = node.fNodeId;
140     dst->fTypeString = node.fTypeString;
141     dst->fAlt = node.fAlt;
142     dst->fLang = node.fLang;
143 
144     size_t childCount = node.fChildVector.size();
145     SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
146     dst->fChildCount = childCount;
147     dst->fChildren = children;
148     for (size_t i = 0; i < childCount; ++i) {
149         Copy(*node.fChildVector[i], &children[i], arena, nodeMap);
150     }
151 
152     dst->fAttributes = std::move(node.fAttributes.fAttrs);
153 }
154 
init(SkPDF::StructureElementNode * node)155 void SkPDFTagTree::init(SkPDF::StructureElementNode* node) {
156     if (node) {
157         fRoot = fArena.make<SkPDFTagNode>();
158         Copy(*node, fRoot, &fArena, &fNodeMap);
159     }
160 }
161 
createMarkIdForNodeId(int nodeId,unsigned pageIndex)162 int SkPDFTagTree::createMarkIdForNodeId(int nodeId, unsigned pageIndex) {
163     if (!fRoot) {
164         return -1;
165     }
166     SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
167     if (!tagPtr) {
168         return -1;
169     }
170     SkPDFTagNode* tag = *tagPtr;
171     SkASSERT(tag);
172     while (SkToUInt(fMarksPerPage.size()) < pageIndex + 1) {
173         fMarksPerPage.push_back();
174     }
175     SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex];
176     int markId = pageMarks.size();
177     tag->fMarkedContent.push_back({pageIndex, markId});
178     pageMarks.push_back(tag);
179     return markId;
180 }
181 
createStructParentKeyForNodeId(int nodeId,unsigned pageIndex)182 int SkPDFTagTree::createStructParentKeyForNodeId(int nodeId, unsigned pageIndex) {
183     if (!fRoot) {
184         return -1;
185     }
186     SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
187     if (!tagPtr) {
188         return -1;
189     }
190     SkPDFTagNode* tag = *tagPtr;
191     SkASSERT(tag);
192 
193     tag->fCanDiscard = SkPDFTagNode::kNo;
194 
195     int nextStructParentKey = kFirstAnnotationStructParentKey +
196         static_cast<int>(fParentTreeAnnotationNodeIds.size());
197     fParentTreeAnnotationNodeIds.push_back(nodeId);
198     return nextStructParentKey;
199 }
200 
can_discard(SkPDFTagNode * node)201 static bool can_discard(SkPDFTagNode* node) {
202     if (node->fCanDiscard == SkPDFTagNode::kYes) {
203         return true;
204     }
205     if (node->fCanDiscard == SkPDFTagNode::kNo) {
206         return false;
207     }
208     if (!node->fMarkedContent.empty()) {
209         node->fCanDiscard = SkPDFTagNode::kNo;
210         return false;
211     }
212     for (size_t i = 0; i < node->fChildCount; ++i) {
213         if (!can_discard(&node->fChildren[i])) {
214             node->fCanDiscard = SkPDFTagNode::kNo;
215             return false;
216         }
217     }
218     node->fCanDiscard = SkPDFTagNode::kYes;
219     return true;
220 }
221 
PrepareTagTreeToEmit(SkPDFIndirectReference parent,SkPDFTagNode * node,SkPDFDocument * doc)222 SkPDFIndirectReference SkPDFTagTree::PrepareTagTreeToEmit(SkPDFIndirectReference parent,
223                                                           SkPDFTagNode* node,
224                                                           SkPDFDocument* doc) {
225     SkPDFIndirectReference ref = doc->reserveRef();
226     std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
227     SkPDFTagNode* children = node->fChildren;
228     size_t childCount = node->fChildCount;
229     for (size_t i = 0; i < childCount; ++i) {
230         SkPDFTagNode* child = &children[i];
231         if (!(can_discard(child))) {
232             kids->appendRef(PrepareTagTreeToEmit(ref, child, doc));
233         }
234     }
235     for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
236         std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR");
237         mcr->insertRef("Pg", doc->getPage(info.fPageIndex));
238         mcr->insertInt("MCID", info.fMarkId);
239         kids->appendObject(std::move(mcr));
240     }
241     for (const SkPDFTagNode::AnnotationInfo& annotationInfo : node->fAnnotations) {
242         std::unique_ptr<SkPDFDict> annotationDict = SkPDFMakeDict("OBJR");
243         annotationDict->insertRef("Obj", annotationInfo.fAnnotationRef);
244         annotationDict->insertRef("Pg", doc->getPage(annotationInfo.fPageIndex));
245         kids->appendObject(std::move(annotationDict));
246     }
247     node->fRef = ref;
248     SkPDFDict dict("StructElem");
249     dict.insertName("S", node->fTypeString.isEmpty() ? "NonStruct" : node->fTypeString.c_str());
250     if (!node->fAlt.isEmpty()) {
251         dict.insertTextString("Alt", node->fAlt);
252     }
253     if (!node->fLang.isEmpty()) {
254         dict.insertTextString("Lang", node->fLang);
255     }
256     dict.insertRef("P", parent);
257     dict.insertObject("K", std::move(kids));
258     if (node->fAttributes) {
259         dict.insertObject("A", std::move(node->fAttributes));
260     }
261 
262     // Each node has a unique ID that also needs to be referenced
263     // in a separate IDTree node, along with the lowest and highest
264     // unique ID string.
265     SkString idString = SkPDFTagNode::nodeIdToString(node->fNodeId);
266     dict.insertByteString("ID", idString.c_str());
267     IDTreeEntry idTreeEntry = {node->fNodeId, ref};
268     fIdTreeEntries.push_back(idTreeEntry);
269 
270     return doc->emit(dict, ref);
271 }
272 
addNodeAnnotation(int nodeId,SkPDFIndirectReference annotationRef,unsigned pageIndex)273 void SkPDFTagTree::addNodeAnnotation(int nodeId, SkPDFIndirectReference annotationRef, unsigned pageIndex) {
274     if (!fRoot) {
275         return;
276     }
277     SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
278     if (!tagPtr) {
279         return;
280     }
281     SkPDFTagNode* tag = *tagPtr;
282     SkASSERT(tag);
283 
284     SkPDFTagNode::AnnotationInfo annotationInfo = {pageIndex, annotationRef};
285     tag->fAnnotations.push_back(annotationInfo);
286 }
287 
makeStructTreeRoot(SkPDFDocument * doc)288 SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
289     if (!fRoot || can_discard(fRoot)) {
290         return SkPDFIndirectReference();
291     }
292 
293     SkPDFIndirectReference ref = doc->reserveRef();
294 
295     unsigned pageCount = SkToUInt(doc->pageCount());
296 
297     // Build the StructTreeRoot.
298     SkPDFDict structTreeRoot("StructTreeRoot");
299     structTreeRoot.insertRef("K", PrepareTagTreeToEmit(ref, fRoot, doc));
300     structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
301 
302     // Build the parent tree, which consists of two things:
303     // (1) For each page, a mapping from the marked content IDs on
304     // each page to their corresponding tags
305     // (2) For each annotation, an indirect reference to that
306     // annotation's struct tree element.
307     SkPDFDict parentTree("ParentTree");
308     auto parentTreeNums = SkPDFMakeArray();
309 
310     // First, one entry per page.
311     SkASSERT(SkToUInt(fMarksPerPage.size()) <= pageCount);
312     for (int j = 0; j < fMarksPerPage.size(); ++j) {
313         const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
314         SkPDFArray markToTagArray;
315         for (SkPDFTagNode* mark : pageMarks) {
316             SkASSERT(mark->fRef);
317             markToTagArray.appendRef(mark->fRef);
318         }
319         parentTreeNums->appendInt(j);
320         parentTreeNums->appendRef(doc->emit(markToTagArray));
321     }
322 
323     // Then, one entry per annotation.
324     for (size_t j = 0; j < fParentTreeAnnotationNodeIds.size(); ++j) {
325         int nodeId = fParentTreeAnnotationNodeIds[j];
326         int structParentKey = kFirstAnnotationStructParentKey + static_cast<int>(j);
327 
328         SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
329         if (!tagPtr) {
330             continue;
331         }
332         SkPDFTagNode* tag = *tagPtr;
333         parentTreeNums->appendInt(structParentKey);
334         parentTreeNums->appendRef(tag->fRef);
335     }
336 
337     parentTree.insertObject("Nums", std::move(parentTreeNums));
338     structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
339 
340     // Build the IDTree, a mapping from every unique ID string to
341     // a reference to its corresponding structure element node.
342     if (!fIdTreeEntries.empty()) {
343         std::sort(fIdTreeEntries.begin(), fIdTreeEntries.end(),
344                   [](const IDTreeEntry& a, const IDTreeEntry& b) {
345                     return a.nodeId < b.nodeId;
346                   });
347 
348         SkPDFDict idTree;
349         SkPDFDict idTreeLeaf;
350         auto limits = SkPDFMakeArray();
351         SkString lowestNodeIdString = SkPDFTagNode::nodeIdToString(
352             fIdTreeEntries.begin()->nodeId);
353         limits->appendByteString(lowestNodeIdString);
354         SkString highestNodeIdString = SkPDFTagNode::nodeIdToString(
355             fIdTreeEntries.rbegin()->nodeId);
356         limits->appendByteString(highestNodeIdString);
357         idTreeLeaf.insertObject("Limits", std::move(limits));
358         auto names = SkPDFMakeArray();
359         for (const IDTreeEntry& entry : fIdTreeEntries) {
360           SkString idString = SkPDFTagNode::nodeIdToString(entry.nodeId);
361             names->appendByteString(idString);
362             names->appendRef(entry.ref);
363         }
364         idTreeLeaf.insertObject("Names", std::move(names));
365         auto idTreeKids = SkPDFMakeArray();
366         idTreeKids->appendRef(doc->emit(idTreeLeaf));
367         idTree.insertObject("Kids", std::move(idTreeKids));
368         structTreeRoot.insertRef("IDTree", doc->emit(idTree));
369     }
370 
371     return doc->emit(structTreeRoot, ref);
372 }
373