1 /*
2 * Copyright 2011 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/pdf/SkPDFMakeToUnicodeCmap.h"
9
10 #include "include/core/SkStream.h"
11 #include "include/private/base/SkTo.h"
12 #include "src/base/SkUTF.h"
13 #include "src/pdf/SkPDFGlyphUse.h"
14 #include "src/pdf/SkPDFUtils.h"
15
16 #include <algorithm>
17 #include <cstddef>
18 #include <vector>
19
append_tounicode_header(SkDynamicMemoryWStream * cmap,bool multibyte)20 static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
21 bool multibyte) {
22 // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
23 // It's there to prevent old version Adobe Readers from malfunctioning.
24 const char* kHeader =
25 "/CIDInit /ProcSet findresource begin\n"
26 "12 dict begin\n"
27 "begincmap\n";
28 cmap->writeText(kHeader);
29
30 // The /CIDSystemInfo must be consistent to the one in
31 // SkPDFFont::populateCIDFont().
32 // We can not pass over the system info object here because the format is
33 // different. This is not a reference object.
34 const char* kSysInfo =
35 "/CIDSystemInfo\n"
36 "<< /Registry (Adobe)\n"
37 "/Ordering (UCS)\n"
38 "/Supplement 0\n"
39 ">> def\n";
40 cmap->writeText(kSysInfo);
41
42 // The CMapName must be consistent to /CIDSystemInfo above.
43 // /CMapType 2 means ToUnicode.
44 // Codespace range just tells the PDF processor the valid range.
45 const char* kTypeInfoHeader =
46 "/CMapName /Adobe-Identity-UCS def\n"
47 "/CMapType 2 def\n"
48 "1 begincodespacerange\n";
49 cmap->writeText(kTypeInfoHeader);
50 if (multibyte) {
51 cmap->writeText("<0000> <FFFF>\n");
52 } else {
53 cmap->writeText("<00> <FF>\n");
54 }
55 cmap->writeText("endcodespacerange\n");
56 }
57
append_cmap_footer(SkDynamicMemoryWStream * cmap)58 static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
59 const char kFooter[] =
60 "endcmap\n"
61 "CMapName currentdict /CMap defineresource pop\n"
62 "end\n"
63 "end";
64 cmap->writeText(kFooter);
65 }
66
67 namespace {
68 struct BFChar {
69 SkGlyphID fGlyphId;
70 SkUnichar fUnicode;
71 };
72
73 struct BFRange {
74 SkGlyphID fStart;
75 SkGlyphID fEnd;
76 SkUnichar fUnicode;
77 };
78 } // namespace
79
write_glyph(SkDynamicMemoryWStream * cmap,bool multiByte,SkGlyphID gid)80 static void write_glyph(SkDynamicMemoryWStream* cmap,
81 bool multiByte,
82 SkGlyphID gid) {
83 if (multiByte) {
84 SkPDFUtils::WriteUInt16BE(cmap, gid);
85 } else {
86 SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
87 }
88 }
89
append_bfchar_section(const std::vector<BFChar> & bfchar,bool multiByte,SkDynamicMemoryWStream * cmap)90 static void append_bfchar_section(const std::vector<BFChar>& bfchar,
91 bool multiByte,
92 SkDynamicMemoryWStream* cmap) {
93 // PDF spec defines that every bf* list can have at most 100 entries.
94 for (size_t i = 0; i < bfchar.size(); i += 100) {
95 int count = SkToInt(bfchar.size() - i);
96 count = std::min(count, 100);
97 cmap->writeDecAsText(count);
98 cmap->writeText(" beginbfchar\n");
99 for (int j = 0; j < count; ++j) {
100 cmap->writeText("<");
101 write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
102 cmap->writeText("> <");
103 SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
104 cmap->writeText(">\n");
105 }
106 cmap->writeText("endbfchar\n");
107 }
108 }
109
append_bfrange_section(const std::vector<BFRange> & bfrange,bool multiByte,SkDynamicMemoryWStream * cmap)110 static void append_bfrange_section(const std::vector<BFRange>& bfrange,
111 bool multiByte,
112 SkDynamicMemoryWStream* cmap) {
113 // PDF spec defines that every bf* list can have at most 100 entries.
114 for (size_t i = 0; i < bfrange.size(); i += 100) {
115 int count = SkToInt(bfrange.size() - i);
116 count = std::min(count, 100);
117 cmap->writeDecAsText(count);
118 cmap->writeText(" beginbfrange\n");
119 for (int j = 0; j < count; ++j) {
120 cmap->writeText("<");
121 write_glyph(cmap, multiByte, bfrange[i + j].fStart);
122 cmap->writeText("> <");
123 write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
124 cmap->writeText("> <");
125 SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
126 cmap->writeText(">\n");
127 }
128 cmap->writeText("endbfrange\n");
129 }
130 }
131
132 // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
133 // Technote 5014.
134 // The function is not static so we can test it in unit tests.
135 //
136 // Current implementation guarantees bfchar and bfrange entries do not overlap.
137 //
138 // Current implementation does not attempt aggressive optimizations against
139 // following case because the specification is not clear.
140 //
141 // 4 beginbfchar 1 beginbfchar
142 // <0003> <0013> <0020> <0014>
143 // <0005> <0015> to endbfchar
144 // <0007> <0017> 1 beginbfrange
145 // <0020> <0014> <0003> <0007> <0013>
146 // endbfchar endbfrange
147 //
148 // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
149 // overlap, but succeeding maps supersede preceding maps."
150 //
151 // In case of searching text in PDF, bfrange will have higher precedence so
152 // typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
153 // the spec does not mention how will this kind of conflict being resolved.
154 //
155 // For the worst case (having 65536 continuous unicode and we use every other
156 // one of them), the possible savings by aggressive optimization is 416KB
157 // pre-compressed and does not provide enough motivation for implementation.
SkPDFAppendCmapSections(const SkUnichar * glyphToUnicode,const SkPDFGlyphUse * subset,SkDynamicMemoryWStream * cmap,bool multiByteGlyphs,SkGlyphID firstGlyphID,SkGlyphID lastGlyphID)158 void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
159 const SkPDFGlyphUse* subset,
160 SkDynamicMemoryWStream* cmap,
161 bool multiByteGlyphs,
162 SkGlyphID firstGlyphID,
163 SkGlyphID lastGlyphID) {
164 int glyphOffset = 0;
165 if (!multiByteGlyphs) {
166 glyphOffset = firstGlyphID - 1;
167 }
168
169 std::vector<BFChar> bfcharEntries;
170 std::vector<BFRange> bfrangeEntries;
171
172 BFRange currentRangeEntry = {0, 0, 0};
173 bool rangeEmpty = true;
174 const int limit = (int)lastGlyphID + 1 - glyphOffset;
175
176 for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
177 SkGlyphID gid = i + glyphOffset;
178 bool inSubset = i < limit && (subset == nullptr || subset->has(gid));
179 if (!rangeEmpty) {
180 // PDF spec requires bfrange not changing the higher byte,
181 // e.g. <1035> <10FF> <2222> is ok, but
182 // <1035> <1100> <2222> is no good
183 bool inRange =
184 i == currentRangeEntry.fEnd + 1 &&
185 i >> 8 == currentRangeEntry.fStart >> 8 &&
186 i < limit &&
187 glyphToUnicode[gid] ==
188 currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
189 if (!inSubset || !inRange) {
190 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
191 bfrangeEntries.push_back(currentRangeEntry);
192 } else {
193 bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
194 }
195 rangeEmpty = true;
196 }
197 }
198 if (inSubset) {
199 currentRangeEntry.fEnd = i;
200 if (rangeEmpty) {
201 currentRangeEntry.fStart = i;
202 currentRangeEntry.fUnicode = glyphToUnicode[gid];
203 rangeEmpty = false;
204 }
205 }
206 }
207
208 // The spec requires all bfchar entries for a font must come before bfrange
209 // entries.
210 append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
211 append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
212 }
213
SkPDFMakeToUnicodeCmap(const SkUnichar * glyphToUnicode,const SkPDFGlyphUse * subset,bool multiByteGlyphs,SkGlyphID firstGlyphID,SkGlyphID lastGlyphID)214 std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
215 const SkUnichar* glyphToUnicode,
216 const SkPDFGlyphUse* subset,
217 bool multiByteGlyphs,
218 SkGlyphID firstGlyphID,
219 SkGlyphID lastGlyphID) {
220 SkDynamicMemoryWStream cmap;
221 append_tounicode_header(&cmap, multiByteGlyphs);
222 SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
223 firstGlyphID, lastGlyphID);
224 append_cmap_footer(&cmap);
225 return cmap.detachAsStream();
226 }
227