• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/pdf/SkPDFDocumentPriv.h"
9 #include "src/pdf/SkPDFTag.h"
10 
11 // Table 333 in PDF 32000-1:2008
tag_name_from_type(SkPDF::DocumentStructureType type)12 static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
13     switch (type) {
14         #define M(X) case SkPDF::DocumentStructureType::k ## X: return #X
15         M(Document);
16         M(Part);
17         M(Art);
18         M(Sect);
19         M(Div);
20         M(BlockQuote);
21         M(Caption);
22         M(TOC);
23         M(TOCI);
24         M(Index);
25         M(NonStruct);
26         M(Private);
27         M(H);
28         M(H1);
29         M(H2);
30         M(H3);
31         M(H4);
32         M(H5);
33         M(H6);
34         M(P);
35         M(L);
36         M(LI);
37         M(Lbl);
38         M(LBody);
39         M(Table);
40         M(TR);
41         M(TH);
42         M(TD);
43         M(THead);
44         M(TBody);
45         M(TFoot);
46         M(Span);
47         M(Quote);
48         M(Note);
49         M(Reference);
50         M(BibEntry);
51         M(Code);
52         M(Link);
53         M(Annot);
54         M(Ruby);
55         M(RB);
56         M(RT);
57         M(RP);
58         M(Warichu);
59         M(WT);
60         M(WP);
61         M(Figure);
62         M(Formula);
63         M(Form);
64         #undef M
65     }
66     SK_ABORT("bad tag");
67 }
68 
69 SkPDF::AttributeList::AttributeList() = default;
70 
71 SkPDF::AttributeList::~AttributeList() = default;
72 
appendInt(const char * owner,const char * name,int value)73 void SkPDF::AttributeList::appendInt(
74         const char* owner, const char* name, int value) {
75     if (!fAttrs)
76         fAttrs = SkPDFMakeArray();
77     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
78     attrDict->insertName("O", owner);
79     attrDict->insertInt(name, value);
80     fAttrs->appendObject(std::move(attrDict));
81 }
82 
appendFloat(const char * owner,const char * name,float value)83 void SkPDF::AttributeList::appendFloat(
84         const char* owner, const char* name, float value) {
85     if (!fAttrs)
86         fAttrs = SkPDFMakeArray();
87     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
88     attrDict->insertName("O", owner);
89     attrDict->insertScalar(name, value);
90     fAttrs->appendObject(std::move(attrDict));
91 }
92 
appendString(const char * owner,const char * name,const char * value)93 void SkPDF::AttributeList::appendString(
94         const char* owner, const char* name, const char* value) {
95     if (!fAttrs)
96         fAttrs = SkPDFMakeArray();
97     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
98     attrDict->insertName("O", owner);
99     attrDict->insertName(name, value);
100     fAttrs->appendObject(std::move(attrDict));
101 }
102 
appendFloatArray(const char * owner,const char * name,const std::vector<float> & value)103 void SkPDF::AttributeList::appendFloatArray(
104         const char* owner, const char* name, const std::vector<float>& value) {
105     if (!fAttrs)
106         fAttrs = SkPDFMakeArray();
107     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
108     attrDict->insertName("O", owner);
109     std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
110     for (float element : value) {
111         pdfArray->appendScalar(element);
112     }
113     attrDict->insertObject(name, std::move(pdfArray));
114     fAttrs->appendObject(std::move(attrDict));
115 }
116 
appendStringArray(const char * owner,const char * name,const std::vector<SkString> & value)117 void SkPDF::AttributeList::appendStringArray(
118         const char* owner,
119         const char* name,
120         const std::vector<SkString>& value) {
121     if (!fAttrs)
122         fAttrs = SkPDFMakeArray();
123     std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
124     attrDict->insertName("O", owner);
125     std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
126     for (SkString element : value) {
127         pdfArray->appendName(element);
128     }
129     attrDict->insertObject(name, std::move(pdfArray));
130     fAttrs->appendObject(std::move(attrDict));
131 }
132 
133 struct SkPDFTagNode {
134     SkPDFTagNode* fChildren = nullptr;
135     size_t fChildCount = 0;
136     struct MarkedContentInfo {
137         unsigned fPageIndex;
138         int fMarkId;
139     };
140     SkTArray<MarkedContentInfo> fMarkedContent;
141     int fNodeId;
142     SkPDF::DocumentStructureType fType;
143     SkString fTypeString;
144     SkString fAlt;
145     SkString fLang;
146     SkPDFIndirectReference fRef;
147     enum State {
148         kUnknown,
149         kYes,
150         kNo,
151     } fCanDiscard = kUnknown;
152     std::unique_ptr<SkPDFArray> fAttributes;
153 };
154 
SkPDFTagTree()155 SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
156 
157 SkPDFTagTree::~SkPDFTagTree() = default;
158 
159 // static
Copy(SkPDF::StructureElementNode & node,SkPDFTagNode * dst,SkArenaAlloc * arena,SkTHashMap<int,SkPDFTagNode * > * nodeMap)160 void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node,
161                         SkPDFTagNode* dst,
162                         SkArenaAlloc* arena,
163                         SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
164     nodeMap->set(node.fNodeId, dst);
165     dst->fNodeId = node.fNodeId;
166     dst->fType = node.fType;
167     dst->fTypeString = node.fTypeString;
168     dst->fAlt = node.fAlt;
169     dst->fLang = node.fLang;
170 
171     // Temporarily support both raw fChildren and fChildVector.
172     if (node.fChildren) {
173         size_t childCount = node.fChildCount;
174         SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
175         dst->fChildCount = childCount;
176         dst->fChildren = children;
177         for (size_t i = 0; i < childCount; ++i) {
178             Copy(node.fChildren[i], &children[i], arena, nodeMap);
179         }
180     } else {
181         size_t childCount = node.fChildVector.size();
182         SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
183         dst->fChildCount = childCount;
184         dst->fChildren = children;
185         for (size_t i = 0; i < childCount; ++i) {
186             Copy(*node.fChildVector[i], &children[i], arena, nodeMap);
187         }
188     }
189 
190     dst->fAttributes = std::move(node.fAttributes.fAttrs);
191 }
192 
init(SkPDF::StructureElementNode * node)193 void SkPDFTagTree::init(SkPDF::StructureElementNode* node) {
194     if (node) {
195         fRoot = fArena.make<SkPDFTagNode>();
196         Copy(*node, fRoot, &fArena, &fNodeMap);
197     }
198 }
199 
reset()200 void SkPDFTagTree::reset() {
201     fArena.reset();
202     fNodeMap.reset();
203     fMarksPerPage.reset();
204     fRoot = nullptr;
205 }
206 
getMarkIdForNodeId(int nodeId,unsigned pageIndex)207 int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) {
208     if (!fRoot) {
209         return -1;
210     }
211     SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
212     if (!tagPtr) {
213         return -1;
214     }
215     SkPDFTagNode* tag = *tagPtr;
216     SkASSERT(tag);
217     while (fMarksPerPage.size() < pageIndex + 1) {
218         fMarksPerPage.push_back();
219     }
220     SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex];
221     int markId = pageMarks.count();
222     tag->fMarkedContent.push_back({pageIndex, markId});
223     pageMarks.push_back(tag);
224     return markId;
225 }
226 
can_discard(SkPDFTagNode * node)227 static bool can_discard(SkPDFTagNode* node) {
228     if (node->fCanDiscard == SkPDFTagNode::kYes) {
229         return true;
230     }
231     if (node->fCanDiscard == SkPDFTagNode::kNo) {
232         return false;
233     }
234     if (!node->fMarkedContent.empty()) {
235         node->fCanDiscard = SkPDFTagNode::kNo;
236         return false;
237     }
238     for (size_t i = 0; i < node->fChildCount; ++i) {
239         if (!can_discard(&node->fChildren[i])) {
240             node->fCanDiscard = SkPDFTagNode::kNo;
241             return false;
242         }
243     }
244     node->fCanDiscard = SkPDFTagNode::kYes;
245     return true;
246 }
247 
248 
prepare_tag_tree_to_emit(SkPDFIndirectReference parent,SkPDFTagNode * node,SkPDFDocument * doc)249 SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
250                                                 SkPDFTagNode* node,
251                                                 SkPDFDocument* doc) {
252     SkPDFIndirectReference ref = doc->reserveRef();
253     std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
254     SkPDFTagNode* children = node->fChildren;
255     size_t childCount = node->fChildCount;
256     for (size_t i = 0; i < childCount; ++i) {
257         SkPDFTagNode* child = &children[i];
258         if (!(can_discard(child))) {
259             kids->appendRef(prepare_tag_tree_to_emit(ref, child, doc));
260         }
261     }
262     for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
263         std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR");
264         mcr->insertRef("Pg", doc->getPage(info.fPageIndex));
265         mcr->insertInt("MCID", info.fMarkId);
266         kids->appendObject(std::move(mcr));
267     }
268     node->fRef = ref;
269     SkPDFDict dict("StructElem");
270     if (!node->fTypeString.isEmpty()) {
271         dict.insertName("S", node->fTypeString.c_str());
272     } else {
273         dict.insertName("S", tag_name_from_type(node->fType));
274     }
275     if (!node->fAlt.isEmpty()) {
276         dict.insertName("Alt", node->fAlt);
277     }
278     if (!node->fLang.isEmpty()) {
279         dict.insertName("Lang", node->fLang);
280     }
281     dict.insertRef("P", parent);
282     dict.insertObject("K", std::move(kids));
283     SkString idString;
284     idString.printf("%d", node->fNodeId);
285     dict.insertName("ID", idString.c_str());
286     if (node->fAttributes) {
287         dict.insertObject("A", std::move(node->fAttributes));
288     }
289 
290     return doc->emit(dict, ref);
291 }
292 
makeStructTreeRoot(SkPDFDocument * doc)293 SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
294     if (!fRoot) {
295         return SkPDFIndirectReference();
296     }
297     if (can_discard(fRoot)) {
298         SkDEBUGFAIL("PDF has tag tree but no marked content.");
299     }
300     SkPDFIndirectReference ref = doc->reserveRef();
301 
302     unsigned pageCount = SkToUInt(doc->pageCount());
303 
304     // Build the StructTreeRoot.
305     SkPDFDict structTreeRoot("StructTreeRoot");
306     structTreeRoot.insertRef("K", prepare_tag_tree_to_emit(ref, fRoot, doc));
307     structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
308 
309     // Build the parent tree, which is a mapping from the marked
310     // content IDs on each page to their corressponding tags.
311     SkPDFDict parentTree("ParentTree");
312     auto parentTreeNums = SkPDFMakeArray();
313 
314     SkASSERT(fMarksPerPage.size() <= pageCount);
315     for (size_t j = 0; j < fMarksPerPage.size(); ++j) {
316         const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
317         SkPDFArray markToTagArray;
318         for (SkPDFTagNode* mark : pageMarks) {
319             SkASSERT(mark->fRef);
320             markToTagArray.appendRef(mark->fRef);
321         }
322         parentTreeNums->appendInt(j);
323         parentTreeNums->appendRef(doc->emit(markToTagArray));
324     }
325     parentTree.insertObject("Nums", std::move(parentTreeNums));
326     structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
327     return doc->emit(structTreeRoot, ref);
328 }
329