1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/pdf/SkPDFDocumentPriv.h"
9 #include "src/pdf/SkPDFTag.h"
10
11 // Table 333 in PDF 32000-1:2008
tag_name_from_type(SkPDF::DocumentStructureType type)12 static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
13 switch (type) {
14 #define M(X) case SkPDF::DocumentStructureType::k ## X: return #X
15 M(Document);
16 M(Part);
17 M(Art);
18 M(Sect);
19 M(Div);
20 M(BlockQuote);
21 M(Caption);
22 M(TOC);
23 M(TOCI);
24 M(Index);
25 M(NonStruct);
26 M(Private);
27 M(H);
28 M(H1);
29 M(H2);
30 M(H3);
31 M(H4);
32 M(H5);
33 M(H6);
34 M(P);
35 M(L);
36 M(LI);
37 M(Lbl);
38 M(LBody);
39 M(Table);
40 M(TR);
41 M(TH);
42 M(TD);
43 M(THead);
44 M(TBody);
45 M(TFoot);
46 M(Span);
47 M(Quote);
48 M(Note);
49 M(Reference);
50 M(BibEntry);
51 M(Code);
52 M(Link);
53 M(Annot);
54 M(Ruby);
55 M(RB);
56 M(RT);
57 M(RP);
58 M(Warichu);
59 M(WT);
60 M(WP);
61 M(Figure);
62 M(Formula);
63 M(Form);
64 #undef M
65 }
66 SK_ABORT("bad tag");
67 }
68
69 SkPDF::AttributeList::AttributeList() = default;
70
71 SkPDF::AttributeList::~AttributeList() = default;
72
appendInt(const char * owner,const char * name,int value)73 void SkPDF::AttributeList::appendInt(
74 const char* owner, const char* name, int value) {
75 if (!fAttrs)
76 fAttrs = SkPDFMakeArray();
77 std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
78 attrDict->insertName("O", owner);
79 attrDict->insertInt(name, value);
80 fAttrs->appendObject(std::move(attrDict));
81 }
82
appendFloat(const char * owner,const char * name,float value)83 void SkPDF::AttributeList::appendFloat(
84 const char* owner, const char* name, float value) {
85 if (!fAttrs)
86 fAttrs = SkPDFMakeArray();
87 std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
88 attrDict->insertName("O", owner);
89 attrDict->insertScalar(name, value);
90 fAttrs->appendObject(std::move(attrDict));
91 }
92
appendString(const char * owner,const char * name,const char * value)93 void SkPDF::AttributeList::appendString(
94 const char* owner, const char* name, const char* value) {
95 if (!fAttrs)
96 fAttrs = SkPDFMakeArray();
97 std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
98 attrDict->insertName("O", owner);
99 attrDict->insertName(name, value);
100 fAttrs->appendObject(std::move(attrDict));
101 }
102
appendFloatArray(const char * owner,const char * name,const std::vector<float> & value)103 void SkPDF::AttributeList::appendFloatArray(
104 const char* owner, const char* name, const std::vector<float>& value) {
105 if (!fAttrs)
106 fAttrs = SkPDFMakeArray();
107 std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
108 attrDict->insertName("O", owner);
109 std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
110 for (float element : value) {
111 pdfArray->appendScalar(element);
112 }
113 attrDict->insertObject(name, std::move(pdfArray));
114 fAttrs->appendObject(std::move(attrDict));
115 }
116
appendStringArray(const char * owner,const char * name,const std::vector<SkString> & value)117 void SkPDF::AttributeList::appendStringArray(
118 const char* owner,
119 const char* name,
120 const std::vector<SkString>& value) {
121 if (!fAttrs)
122 fAttrs = SkPDFMakeArray();
123 std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
124 attrDict->insertName("O", owner);
125 std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
126 for (SkString element : value) {
127 pdfArray->appendName(element);
128 }
129 attrDict->insertObject(name, std::move(pdfArray));
130 fAttrs->appendObject(std::move(attrDict));
131 }
132
133 struct SkPDFTagNode {
134 SkPDFTagNode* fChildren = nullptr;
135 size_t fChildCount = 0;
136 struct MarkedContentInfo {
137 unsigned fPageIndex;
138 int fMarkId;
139 };
140 SkTArray<MarkedContentInfo> fMarkedContent;
141 int fNodeId;
142 SkPDF::DocumentStructureType fType;
143 SkString fTypeString;
144 SkString fAlt;
145 SkString fLang;
146 SkPDFIndirectReference fRef;
147 enum State {
148 kUnknown,
149 kYes,
150 kNo,
151 } fCanDiscard = kUnknown;
152 std::unique_ptr<SkPDFArray> fAttributes;
153 };
154
SkPDFTagTree()155 SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
156
157 SkPDFTagTree::~SkPDFTagTree() = default;
158
159 // static
Copy(SkPDF::StructureElementNode & node,SkPDFTagNode * dst,SkArenaAlloc * arena,SkTHashMap<int,SkPDFTagNode * > * nodeMap)160 void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node,
161 SkPDFTagNode* dst,
162 SkArenaAlloc* arena,
163 SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
164 nodeMap->set(node.fNodeId, dst);
165 dst->fNodeId = node.fNodeId;
166 dst->fType = node.fType;
167 dst->fTypeString = node.fTypeString;
168 dst->fAlt = node.fAlt;
169 dst->fLang = node.fLang;
170
171 // Temporarily support both raw fChildren and fChildVector.
172 if (node.fChildren) {
173 size_t childCount = node.fChildCount;
174 SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
175 dst->fChildCount = childCount;
176 dst->fChildren = children;
177 for (size_t i = 0; i < childCount; ++i) {
178 Copy(node.fChildren[i], &children[i], arena, nodeMap);
179 }
180 } else {
181 size_t childCount = node.fChildVector.size();
182 SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
183 dst->fChildCount = childCount;
184 dst->fChildren = children;
185 for (size_t i = 0; i < childCount; ++i) {
186 Copy(*node.fChildVector[i], &children[i], arena, nodeMap);
187 }
188 }
189
190 dst->fAttributes = std::move(node.fAttributes.fAttrs);
191 }
192
init(SkPDF::StructureElementNode * node)193 void SkPDFTagTree::init(SkPDF::StructureElementNode* node) {
194 if (node) {
195 fRoot = fArena.make<SkPDFTagNode>();
196 Copy(*node, fRoot, &fArena, &fNodeMap);
197 }
198 }
199
reset()200 void SkPDFTagTree::reset() {
201 fArena.reset();
202 fNodeMap.reset();
203 fMarksPerPage.reset();
204 fRoot = nullptr;
205 }
206
getMarkIdForNodeId(int nodeId,unsigned pageIndex)207 int SkPDFTagTree::getMarkIdForNodeId(int nodeId, unsigned pageIndex) {
208 if (!fRoot) {
209 return -1;
210 }
211 SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
212 if (!tagPtr) {
213 return -1;
214 }
215 SkPDFTagNode* tag = *tagPtr;
216 SkASSERT(tag);
217 while (fMarksPerPage.size() < pageIndex + 1) {
218 fMarksPerPage.push_back();
219 }
220 SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex];
221 int markId = pageMarks.count();
222 tag->fMarkedContent.push_back({pageIndex, markId});
223 pageMarks.push_back(tag);
224 return markId;
225 }
226
can_discard(SkPDFTagNode * node)227 static bool can_discard(SkPDFTagNode* node) {
228 if (node->fCanDiscard == SkPDFTagNode::kYes) {
229 return true;
230 }
231 if (node->fCanDiscard == SkPDFTagNode::kNo) {
232 return false;
233 }
234 if (!node->fMarkedContent.empty()) {
235 node->fCanDiscard = SkPDFTagNode::kNo;
236 return false;
237 }
238 for (size_t i = 0; i < node->fChildCount; ++i) {
239 if (!can_discard(&node->fChildren[i])) {
240 node->fCanDiscard = SkPDFTagNode::kNo;
241 return false;
242 }
243 }
244 node->fCanDiscard = SkPDFTagNode::kYes;
245 return true;
246 }
247
248
prepare_tag_tree_to_emit(SkPDFIndirectReference parent,SkPDFTagNode * node,SkPDFDocument * doc)249 SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
250 SkPDFTagNode* node,
251 SkPDFDocument* doc) {
252 SkPDFIndirectReference ref = doc->reserveRef();
253 std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
254 SkPDFTagNode* children = node->fChildren;
255 size_t childCount = node->fChildCount;
256 for (size_t i = 0; i < childCount; ++i) {
257 SkPDFTagNode* child = &children[i];
258 if (!(can_discard(child))) {
259 kids->appendRef(prepare_tag_tree_to_emit(ref, child, doc));
260 }
261 }
262 for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
263 std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR");
264 mcr->insertRef("Pg", doc->getPage(info.fPageIndex));
265 mcr->insertInt("MCID", info.fMarkId);
266 kids->appendObject(std::move(mcr));
267 }
268 node->fRef = ref;
269 SkPDFDict dict("StructElem");
270 if (!node->fTypeString.isEmpty()) {
271 dict.insertName("S", node->fTypeString.c_str());
272 } else {
273 dict.insertName("S", tag_name_from_type(node->fType));
274 }
275 if (!node->fAlt.isEmpty()) {
276 dict.insertName("Alt", node->fAlt);
277 }
278 if (!node->fLang.isEmpty()) {
279 dict.insertName("Lang", node->fLang);
280 }
281 dict.insertRef("P", parent);
282 dict.insertObject("K", std::move(kids));
283 SkString idString;
284 idString.printf("%d", node->fNodeId);
285 dict.insertName("ID", idString.c_str());
286 if (node->fAttributes) {
287 dict.insertObject("A", std::move(node->fAttributes));
288 }
289
290 return doc->emit(dict, ref);
291 }
292
makeStructTreeRoot(SkPDFDocument * doc)293 SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
294 if (!fRoot) {
295 return SkPDFIndirectReference();
296 }
297 if (can_discard(fRoot)) {
298 SkDEBUGFAIL("PDF has tag tree but no marked content.");
299 }
300 SkPDFIndirectReference ref = doc->reserveRef();
301
302 unsigned pageCount = SkToUInt(doc->pageCount());
303
304 // Build the StructTreeRoot.
305 SkPDFDict structTreeRoot("StructTreeRoot");
306 structTreeRoot.insertRef("K", prepare_tag_tree_to_emit(ref, fRoot, doc));
307 structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
308
309 // Build the parent tree, which is a mapping from the marked
310 // content IDs on each page to their corressponding tags.
311 SkPDFDict parentTree("ParentTree");
312 auto parentTreeNums = SkPDFMakeArray();
313
314 SkASSERT(fMarksPerPage.size() <= pageCount);
315 for (size_t j = 0; j < fMarksPerPage.size(); ++j) {
316 const SkTArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
317 SkPDFArray markToTagArray;
318 for (SkPDFTagNode* mark : pageMarks) {
319 SkASSERT(mark->fRef);
320 markToTagArray.appendRef(mark->fRef);
321 }
322 parentTreeNums->appendInt(j);
323 parentTreeNums->appendRef(doc->emit(markToTagArray));
324 }
325 parentTree.insertObject("Nums", std::move(parentTreeNums));
326 structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
327 return doc->emit(structTreeRoot, ref);
328 }
329