• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/pdf/SkPDFMakeToUnicodeCmap.h"
9 
10 #include "include/core/SkStream.h"
11 #include "include/private/base/SkTo.h"
12 #include "src/base/SkUTF.h"
13 #include "src/pdf/SkPDFGlyphUse.h"
14 #include "src/pdf/SkPDFUtils.h"
15 
16 #include <algorithm>
17 #include <cstddef>
18 #include <vector>
19 
append_tounicode_header(SkDynamicMemoryWStream * cmap,bool multibyte)20 static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
21                                     bool multibyte) {
22     // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
23     // It's there to prevent old version Adobe Readers from malfunctioning.
24     const char* kHeader =
25         "/CIDInit /ProcSet findresource begin\n"
26         "12 dict begin\n"
27         "begincmap\n";
28     cmap->writeText(kHeader);
29 
30     // The /CIDSystemInfo must be consistent to the one in
31     // SkPDFFont::populateCIDFont().
32     // We can not pass over the system info object here because the format is
33     // different. This is not a reference object.
34     const char* kSysInfo =
35         "/CIDSystemInfo\n"
36         "<<  /Registry (Adobe)\n"
37         "/Ordering (UCS)\n"
38         "/Supplement 0\n"
39         ">> def\n";
40     cmap->writeText(kSysInfo);
41 
42     // The CMapName must be consistent to /CIDSystemInfo above.
43     // /CMapType 2 means ToUnicode.
44     // Codespace range just tells the PDF processor the valid range.
45     const char* kTypeInfoHeader =
46         "/CMapName /Adobe-Identity-UCS def\n"
47         "/CMapType 2 def\n"
48         "1 begincodespacerange\n";
49     cmap->writeText(kTypeInfoHeader);
50     if (multibyte) {
51         cmap->writeText("<0000> <FFFF>\n");
52     } else {
53         cmap->writeText("<00> <FF>\n");
54     }
55     cmap->writeText("endcodespacerange\n");
56 }
57 
append_cmap_footer(SkDynamicMemoryWStream * cmap)58 static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
59     const char kFooter[] =
60         "endcmap\n"
61         "CMapName currentdict /CMap defineresource pop\n"
62         "end\n"
63         "end";
64     cmap->writeText(kFooter);
65 }
66 
67 namespace {
68 struct BFChar {
69     SkGlyphID fGlyphId;
70     SkUnichar fUnicode;
71 };
72 
73 struct BFRange {
74     SkGlyphID fStart;
75     SkGlyphID fEnd;
76     SkUnichar fUnicode;
77 };
78 }  // namespace
79 
write_glyph(SkDynamicMemoryWStream * cmap,bool multiByte,SkGlyphID gid)80 static void write_glyph(SkDynamicMemoryWStream* cmap,
81                         bool multiByte,
82                         SkGlyphID gid) {
83     if (multiByte) {
84         SkPDFUtils::WriteUInt16BE(cmap, gid);
85     } else {
86         SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
87     }
88 }
89 
append_bfchar_section(const std::vector<BFChar> & bfchar,bool multiByte,SkDynamicMemoryWStream * cmap)90 static void append_bfchar_section(const std::vector<BFChar>& bfchar,
91                                   bool multiByte,
92                                   SkDynamicMemoryWStream* cmap) {
93     // PDF spec defines that every bf* list can have at most 100 entries.
94     for (size_t i = 0; i < bfchar.size(); i += 100) {
95         int count = SkToInt(bfchar.size() - i);
96         count = std::min(count, 100);
97         cmap->writeDecAsText(count);
98         cmap->writeText(" beginbfchar\n");
99         for (int j = 0; j < count; ++j) {
100             cmap->writeText("<");
101             write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
102             cmap->writeText("> <");
103             SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode);
104             cmap->writeText(">\n");
105         }
106         cmap->writeText("endbfchar\n");
107     }
108 }
109 
append_bfrange_section(const std::vector<BFRange> & bfrange,bool multiByte,SkDynamicMemoryWStream * cmap)110 static void append_bfrange_section(const std::vector<BFRange>& bfrange,
111                                    bool multiByte,
112                                    SkDynamicMemoryWStream* cmap) {
113     // PDF spec defines that every bf* list can have at most 100 entries.
114     for (size_t i = 0; i < bfrange.size(); i += 100) {
115         int count = SkToInt(bfrange.size() - i);
116         count = std::min(count, 100);
117         cmap->writeDecAsText(count);
118         cmap->writeText(" beginbfrange\n");
119         for (int j = 0; j < count; ++j) {
120             cmap->writeText("<");
121             write_glyph(cmap, multiByte, bfrange[i + j].fStart);
122             cmap->writeText("> <");
123             write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
124             cmap->writeText("> <");
125             SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode);
126             cmap->writeText(">\n");
127         }
128         cmap->writeText("endbfrange\n");
129     }
130 }
131 
132 // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
133 // Technote 5014.
134 // The function is not static so we can test it in unit tests.
135 //
136 // Current implementation guarantees bfchar and bfrange entries do not overlap.
137 //
138 // Current implementation does not attempt aggressive optimizations against
139 // following case because the specification is not clear.
140 //
141 // 4 beginbfchar          1 beginbfchar
142 // <0003> <0013>          <0020> <0014>
143 // <0005> <0015>    to    endbfchar
144 // <0007> <0017>          1 beginbfrange
145 // <0020> <0014>          <0003> <0007> <0013>
146 // endbfchar              endbfrange
147 //
148 // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
149 // overlap, but succeeding maps supersede preceding maps."
150 //
151 // In case of searching text in PDF, bfrange will have higher precedence so
152 // typing char id 0x0014 in search box will get glyph id 0x0004 first.  However,
153 // the spec does not mention how will this kind of conflict being resolved.
154 //
155 // For the worst case (having 65536 continuous unicode and we use every other
156 // one of them), the possible savings by aggressive optimization is 416KB
157 // pre-compressed and does not provide enough motivation for implementation.
SkPDFAppendCmapSections(const SkUnichar * glyphToUnicode,const SkPDFGlyphUse * subset,SkDynamicMemoryWStream * cmap,bool multiByteGlyphs,SkGlyphID firstGlyphID,SkGlyphID lastGlyphID)158 void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
159                              const SkPDFGlyphUse* subset,
160                              SkDynamicMemoryWStream* cmap,
161                              bool multiByteGlyphs,
162                              SkGlyphID firstGlyphID,
163                              SkGlyphID lastGlyphID) {
164     int glyphOffset = 0;
165     if (!multiByteGlyphs) {
166         glyphOffset = firstGlyphID - 1;
167     }
168 
169     std::vector<BFChar> bfcharEntries;
170     std::vector<BFRange> bfrangeEntries;
171 
172     BFRange currentRangeEntry = {0, 0, 0};
173     bool rangeEmpty = true;
174     const int limit = (int)lastGlyphID + 1 - glyphOffset;
175 
176     for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) {
177         SkGlyphID gid = i + glyphOffset;
178         bool inSubset = i < limit && (subset == nullptr || subset->has(gid));
179         if (!rangeEmpty) {
180             // PDF spec requires bfrange not changing the higher byte,
181             // e.g. <1035> <10FF> <2222> is ok, but
182             //      <1035> <1100> <2222> is no good
183             bool inRange =
184                 i == currentRangeEntry.fEnd + 1 &&
185                 i >> 8 == currentRangeEntry.fStart >> 8 &&
186                 i < limit &&
187                 glyphToUnicode[gid] ==
188                     currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
189             if (!inSubset || !inRange) {
190                 if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
191                     bfrangeEntries.push_back(currentRangeEntry);
192                 } else {
193                     bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
194                 }
195                 rangeEmpty = true;
196             }
197         }
198         if (inSubset) {
199             currentRangeEntry.fEnd = i;
200             if (rangeEmpty) {
201               currentRangeEntry.fStart = i;
202               currentRangeEntry.fUnicode = glyphToUnicode[gid];
203               rangeEmpty = false;
204             }
205         }
206     }
207 
208     // The spec requires all bfchar entries for a font must come before bfrange
209     // entries.
210     append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
211     append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
212 }
213 
SkPDFMakeToUnicodeCmap(const SkUnichar * glyphToUnicode,const SkPDFGlyphUse * subset,bool multiByteGlyphs,SkGlyphID firstGlyphID,SkGlyphID lastGlyphID)214 std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
215         const SkUnichar* glyphToUnicode,
216         const SkPDFGlyphUse* subset,
217         bool multiByteGlyphs,
218         SkGlyphID firstGlyphID,
219         SkGlyphID lastGlyphID) {
220     SkDynamicMemoryWStream cmap;
221     append_tounicode_header(&cmap, multiByteGlyphs);
222     SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
223                             firstGlyphID, lastGlyphID);
224     append_cmap_footer(&cmap);
225     return cmap.detachAsStream();
226 }
227