• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "SkPDFMakeToUnicodeCmap.h"
9 #include "SkPDFUtils.h"
10 #include "SkUtils.h"
11 
append_tounicode_header(SkDynamicMemoryWStream * cmap,bool multibyte)12 static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
13                                     bool multibyte) {
14     // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
15     // It's there to prevent old version Adobe Readers from malfunctioning.
16     const char* kHeader =
17         "/CIDInit /ProcSet findresource begin\n"
18         "12 dict begin\n"
19         "begincmap\n";
20     cmap->writeText(kHeader);
21 
22     // The /CIDSystemInfo must be consistent to the one in
23     // SkPDFFont::populateCIDFont().
24     // We can not pass over the system info object here because the format is
25     // different. This is not a reference object.
26     const char* kSysInfo =
27         "/CIDSystemInfo\n"
28         "<<  /Registry (Adobe)\n"
29         "/Ordering (UCS)\n"
30         "/Supplement 0\n"
31         ">> def\n";
32     cmap->writeText(kSysInfo);
33 
34     // The CMapName must be consistent to /CIDSystemInfo above.
35     // /CMapType 2 means ToUnicode.
36     // Codespace range just tells the PDF processor the valid range.
37     const char* kTypeInfoHeader =
38         "/CMapName /Adobe-Identity-UCS def\n"
39         "/CMapType 2 def\n"
40         "1 begincodespacerange\n";
41     cmap->writeText(kTypeInfoHeader);
42     if (multibyte) {
43         cmap->writeText("<0000> <FFFF>\n");
44     } else {
45         cmap->writeText("<00> <FF>\n");
46     }
47     cmap->writeText("endcodespacerange\n");
48 }
49 
append_cmap_footer(SkDynamicMemoryWStream * cmap)50 static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
51     const char kFooter[] =
52         "endcmap\n"
53         "CMapName currentdict /CMap defineresource pop\n"
54         "end\n"
55         "end";
56     cmap->writeText(kFooter);
57 }
58 
59 namespace {
60 struct BFChar {
61     SkGlyphID fGlyphId;
62     SkUnichar fUnicode;
63 };
64 
65 struct BFRange {
66     SkGlyphID fStart;
67     SkGlyphID fEnd;
68     SkUnichar fUnicode;
69 };
70 }  // namespace
71 
write_glyph(SkDynamicMemoryWStream * cmap,bool multiByte,SkGlyphID gid)72 static void write_glyph(SkDynamicMemoryWStream* cmap,
73                         bool multiByte,
74                         SkGlyphID gid) {
75     if (multiByte) {
76         SkPDFUtils::WriteUInt16BE(cmap, gid);
77     } else {
78         SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
79     }
80 }
81 
append_bfchar_section(const SkTDArray<BFChar> & bfchar,bool multiByte,SkDynamicMemoryWStream * cmap)82 static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
83                                   bool multiByte,
84                                   SkDynamicMemoryWStream* cmap) {
85     // PDF spec defines that every bf* list can have at most 100 entries.
86     for (int i = 0; i < bfchar.count(); i += 100) {
87         int count = bfchar.count() - i;
88         count = SkMin32(count, 100);
89         cmap->writeDecAsText(count);
90         cmap->writeText(" beginbfchar\n");
91         for (int j = 0; j < count; ++j) {
92             cmap->writeText("<");
93             write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
94             cmap->writeText("> <");
95             SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
96             cmap->writeText(">\n");
97         }
98         cmap->writeText("endbfchar\n");
99     }
100 }
101 
append_bfrange_section(const SkTDArray<BFRange> & bfrange,bool multiByte,SkDynamicMemoryWStream * cmap)102 static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
103                                    bool multiByte,
104                                    SkDynamicMemoryWStream* cmap) {
105     // PDF spec defines that every bf* list can have at most 100 entries.
106     for (int i = 0; i < bfrange.count(); i += 100) {
107         int count = bfrange.count() - i;
108         count = SkMin32(count, 100);
109         cmap->writeDecAsText(count);
110         cmap->writeText(" beginbfrange\n");
111         for (int j = 0; j < count; ++j) {
112             cmap->writeText("<");
113             write_glyph(cmap, multiByte, bfrange[i + j].fStart);
114             cmap->writeText("> <");
115             write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
116             cmap->writeText("> <");
117             SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
118             cmap->writeText(">\n");
119         }
120         cmap->writeText("endbfrange\n");
121     }
122 }
123 
124 // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
125 // Technote 5014.
126 // The function is not static so we can test it in unit tests.
127 //
128 // Current implementation guarantees bfchar and bfrange entries do not overlap.
129 //
130 // Current implementation does not attempt aggresive optimizations against
131 // following case because the specification is not clear.
132 //
133 // 4 beginbfchar          1 beginbfchar
134 // <0003> <0013>          <0020> <0014>
135 // <0005> <0015>    to    endbfchar
136 // <0007> <0017>          1 beginbfrange
137 // <0020> <0014>          <0003> <0007> <0013>
138 // endbfchar              endbfrange
139 //
140 // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
141 // overlap, but succeeding maps supersede preceding maps."
142 //
143 // In case of searching text in PDF, bfrange will have higher precedence so
144 // typing char id 0x0014 in search box will get glyph id 0x0004 first.  However,
145 // the spec does not mention how will this kind of conflict being resolved.
146 //
147 // For the worst case (having 65536 continuous unicode and we use every other
148 // one of them), the possible savings by aggressive optimization is 416KB
149 // pre-compressed and does not provide enough motivation for implementation.
SkPDFAppendCmapSections(const SkTDArray<SkUnichar> & glyphToUnicode,const SkBitSet * subset,SkDynamicMemoryWStream * cmap,bool multiByteGlyphs,SkGlyphID firstGlyphID,SkGlyphID lastGlyphID)150 void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
151                              const SkBitSet* subset,
152                              SkDynamicMemoryWStream* cmap,
153                              bool multiByteGlyphs,
154                              SkGlyphID firstGlyphID,
155                              SkGlyphID lastGlyphID) {
156     if (glyphToUnicode.isEmpty()) {
157         return;
158     }
159     int glyphOffset = 0;
160     if (!multiByteGlyphs) {
161         glyphOffset = firstGlyphID - 1;
162     }
163 
164     SkTDArray<BFChar> bfcharEntries;
165     SkTDArray<BFRange> bfrangeEntries;
166 
167     BFRange currentRangeEntry = {0, 0, 0};
168     bool rangeEmpty = true;
169     const int limit =
170             SkMin32(lastGlyphID + 1, glyphToUnicode.count()) - glyphOffset;
171 
172     for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
173         bool inSubset = i < limit &&
174                         (subset == nullptr || subset->has(i + glyphOffset));
175         if (!rangeEmpty) {
176             // PDF spec requires bfrange not changing the higher byte,
177             // e.g. <1035> <10FF> <2222> is ok, but
178             //      <1035> <1100> <2222> is no good
179             bool inRange =
180                 i == currentRangeEntry.fEnd + 1 &&
181                 i >> 8 == currentRangeEntry.fStart >> 8 &&
182                 i < limit &&
183                 glyphToUnicode[i + glyphOffset] ==
184                     currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
185             if (!inSubset || !inRange) {
186                 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
187                     bfrangeEntries.push(currentRangeEntry);
188                 } else {
189                     BFChar* entry = bfcharEntries.append();
190                     entry->fGlyphId = currentRangeEntry.fStart;
191                     entry->fUnicode = currentRangeEntry.fUnicode;
192                 }
193                 rangeEmpty = true;
194             }
195         }
196         if (inSubset) {
197             currentRangeEntry.fEnd = i;
198             if (rangeEmpty) {
199               currentRangeEntry.fStart = i;
200               currentRangeEntry.fUnicode = glyphToUnicode[i + glyphOffset];
201               rangeEmpty = false;
202             }
203         }
204     }
205 
206     // The spec requires all bfchar entries for a font must come before bfrange
207     // entries.
208     append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
209     append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
210 }
211 
SkPDFMakeToUnicodeCmap(const SkTDArray<SkUnichar> & glyphToUnicode,const SkBitSet * subset,bool multiByteGlyphs,SkGlyphID firstGlyphID,SkGlyphID lastGlyphID)212 sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
213         const SkTDArray<SkUnichar>& glyphToUnicode,
214         const SkBitSet* subset,
215         bool multiByteGlyphs,
216         SkGlyphID firstGlyphID,
217         SkGlyphID lastGlyphID) {
218     SkDynamicMemoryWStream cmap;
219     append_tounicode_header(&cmap, multiByteGlyphs);
220     SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
221                             firstGlyphID, lastGlyphID);
222     append_cmap_footer(&cmap);
223     return sk_make_sp<SkPDFStream>(
224             std::unique_ptr<SkStreamAsset>(cmap.detachAsStream()));
225 }
226