• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/font/cpdf_cidfont.h"
8 
9 #include <algorithm>
10 #include <limits>
11 #include <utility>
12 #include <vector>
13 
14 #include "build/build_config.h"
15 #include "core/fpdfapi/cmaps/fpdf_cmaps.h"
16 #include "core/fpdfapi/font/cfx_cttgsubtable.h"
17 #include "core/fpdfapi/font/cpdf_cid2unicodemap.h"
18 #include "core/fpdfapi/font/cpdf_cmap.h"
19 #include "core/fpdfapi/font/cpdf_cmapparser.h"
20 #include "core/fpdfapi/font/cpdf_fontencoding.h"
21 #include "core/fpdfapi/font/cpdf_fontglobals.h"
22 #include "core/fpdfapi/parser/cpdf_array.h"
23 #include "core/fpdfapi/parser/cpdf_dictionary.h"
24 #include "core/fpdfapi/parser/cpdf_stream.h"
25 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
26 #include "core/fxcrt/fx_codepage.h"
27 #include "core/fxcrt/fx_memory.h"
28 #include "core/fxcrt/fx_safe_types.h"
29 #include "core/fxcrt/fx_unicode.h"
30 #include "core/fxcrt/stl_util.h"
31 #include "core/fxge/fx_font.h"
32 #include "third_party/base/check.h"
33 #include "third_party/base/check_op.h"
34 #include "third_party/base/cxx17_backports.h"
35 #include "third_party/base/span.h"
36 
37 namespace {
38 
39 constexpr FX_CodePage kCharsetCodePages[CIDSET_NUM_SETS] = {
40     FX_CodePage::kDefANSI,
41     FX_CodePage::kChineseSimplified,
42     FX_CodePage::kChineseTraditional,
43     FX_CodePage::kShiftJIS,
44     FX_CodePage::kHangul,
45     FX_CodePage::kUTF16LE};
46 
47 struct CIDTransform {
48   uint16_t cid;
49   uint8_t a;
50   uint8_t b;
51   uint8_t c;
52   uint8_t d;
53   uint8_t e;
54   uint8_t f;
55 };
56 
57 constexpr CIDTransform kJapan1VerticalCIDs[] = {
58     {97, 129, 0, 0, 127, 55, 0},     {7887, 127, 0, 0, 127, 76, 89},
59     {7888, 127, 0, 0, 127, 79, 94},  {7889, 0, 129, 127, 0, 17, 127},
60     {7890, 0, 129, 127, 0, 17, 127}, {7891, 0, 129, 127, 0, 17, 127},
61     {7892, 0, 129, 127, 0, 17, 127}, {7893, 0, 129, 127, 0, 17, 127},
62     {7894, 0, 129, 127, 0, 17, 127}, {7895, 0, 129, 127, 0, 17, 127},
63     {7896, 0, 129, 127, 0, 17, 127}, {7897, 0, 129, 127, 0, 17, 127},
64     {7898, 0, 129, 127, 0, 17, 127}, {7899, 0, 129, 127, 0, 17, 104},
65     {7900, 0, 129, 127, 0, 17, 127}, {7901, 0, 129, 127, 0, 17, 104},
66     {7902, 0, 129, 127, 0, 17, 127}, {7903, 0, 129, 127, 0, 17, 127},
67     {7904, 0, 129, 127, 0, 17, 127}, {7905, 0, 129, 127, 0, 17, 114},
68     {7906, 0, 129, 127, 0, 17, 127}, {7907, 0, 129, 127, 0, 17, 127},
69     {7908, 0, 129, 127, 0, 17, 127}, {7909, 0, 129, 127, 0, 17, 127},
70     {7910, 0, 129, 127, 0, 17, 127}, {7911, 0, 129, 127, 0, 17, 127},
71     {7912, 0, 129, 127, 0, 17, 127}, {7913, 0, 129, 127, 0, 17, 127},
72     {7914, 0, 129, 127, 0, 17, 127}, {7915, 0, 129, 127, 0, 17, 114},
73     {7916, 0, 129, 127, 0, 17, 127}, {7917, 0, 129, 127, 0, 17, 127},
74     {7918, 127, 0, 0, 127, 18, 25},  {7919, 127, 0, 0, 127, 18, 25},
75     {7920, 127, 0, 0, 127, 18, 25},  {7921, 127, 0, 0, 127, 18, 25},
76     {7922, 127, 0, 0, 127, 18, 25},  {7923, 127, 0, 0, 127, 18, 25},
77     {7924, 127, 0, 0, 127, 18, 25},  {7925, 127, 0, 0, 127, 18, 25},
78     {7926, 127, 0, 0, 127, 18, 25},  {7927, 127, 0, 0, 127, 18, 25},
79     {7928, 127, 0, 0, 127, 18, 25},  {7929, 127, 0, 0, 127, 18, 25},
80     {7930, 127, 0, 0, 127, 18, 25},  {7931, 127, 0, 0, 127, 18, 25},
81     {7932, 127, 0, 0, 127, 18, 25},  {7933, 127, 0, 0, 127, 18, 25},
82     {7934, 127, 0, 0, 127, 18, 25},  {7935, 127, 0, 0, 127, 18, 25},
83     {7936, 127, 0, 0, 127, 18, 25},  {7937, 127, 0, 0, 127, 18, 25},
84     {7938, 127, 0, 0, 127, 18, 25},  {7939, 127, 0, 0, 127, 18, 25},
85     {8720, 0, 129, 127, 0, 19, 102}, {8721, 0, 129, 127, 0, 13, 127},
86     {8722, 0, 129, 127, 0, 19, 108}, {8723, 0, 129, 127, 0, 19, 102},
87     {8724, 0, 129, 127, 0, 19, 102}, {8725, 0, 129, 127, 0, 19, 102},
88     {8726, 0, 129, 127, 0, 19, 102}, {8727, 0, 129, 127, 0, 19, 102},
89     {8728, 0, 129, 127, 0, 19, 114}, {8729, 0, 129, 127, 0, 19, 114},
90     {8730, 0, 129, 127, 0, 38, 108}, {8731, 0, 129, 127, 0, 13, 108},
91     {8732, 0, 129, 127, 0, 19, 108}, {8733, 0, 129, 127, 0, 19, 108},
92     {8734, 0, 129, 127, 0, 19, 108}, {8735, 0, 129, 127, 0, 19, 108},
93     {8736, 0, 129, 127, 0, 19, 102}, {8737, 0, 129, 127, 0, 19, 102},
94     {8738, 0, 129, 127, 0, 19, 102}, {8739, 0, 129, 127, 0, 19, 102},
95     {8740, 0, 129, 127, 0, 19, 102}, {8741, 0, 129, 127, 0, 19, 102},
96     {8742, 0, 129, 127, 0, 19, 102}, {8743, 0, 129, 127, 0, 19, 102},
97     {8744, 0, 129, 127, 0, 19, 102}, {8745, 0, 129, 127, 0, 19, 102},
98     {8746, 0, 129, 127, 0, 19, 114}, {8747, 0, 129, 127, 0, 19, 114},
99     {8748, 0, 129, 127, 0, 19, 102}, {8749, 0, 129, 127, 0, 19, 102},
100     {8750, 0, 129, 127, 0, 19, 102}, {8751, 0, 129, 127, 0, 19, 102},
101     {8752, 0, 129, 127, 0, 19, 102}, {8753, 0, 129, 127, 0, 19, 102},
102     {8754, 0, 129, 127, 0, 19, 102}, {8755, 0, 129, 127, 0, 19, 102},
103     {8756, 0, 129, 127, 0, 19, 102}, {8757, 0, 129, 127, 0, 19, 102},
104     {8758, 0, 129, 127, 0, 19, 102}, {8759, 0, 129, 127, 0, 19, 102},
105     {8760, 0, 129, 127, 0, 19, 102}, {8761, 0, 129, 127, 0, 19, 102},
106     {8762, 0, 129, 127, 0, 19, 102}, {8763, 0, 129, 127, 0, 19, 102},
107     {8764, 0, 129, 127, 0, 19, 102}, {8765, 0, 129, 127, 0, 19, 102},
108     {8766, 0, 129, 127, 0, 19, 102}, {8767, 0, 129, 127, 0, 19, 102},
109     {8768, 0, 129, 127, 0, 19, 102}, {8769, 0, 129, 127, 0, 19, 102},
110     {8770, 0, 129, 127, 0, 19, 102}, {8771, 0, 129, 127, 0, 19, 102},
111     {8772, 0, 129, 127, 0, 19, 102}, {8773, 0, 129, 127, 0, 19, 102},
112     {8774, 0, 129, 127, 0, 19, 102}, {8775, 0, 129, 127, 0, 19, 102},
113     {8776, 0, 129, 127, 0, 19, 102}, {8777, 0, 129, 127, 0, 19, 102},
114     {8778, 0, 129, 127, 0, 19, 102}, {8779, 0, 129, 127, 0, 19, 114},
115     {8780, 0, 129, 127, 0, 19, 108}, {8781, 0, 129, 127, 0, 19, 114},
116     {8782, 0, 129, 127, 0, 13, 114}, {8783, 0, 129, 127, 0, 19, 108},
117     {8784, 0, 129, 127, 0, 13, 114}, {8785, 0, 129, 127, 0, 19, 108},
118     {8786, 0, 129, 127, 0, 19, 108}, {8787, 0, 129, 127, 0, 19, 108},
119     {8788, 0, 129, 127, 0, 19, 108}, {8789, 0, 129, 127, 0, 19, 108},
120     {8790, 0, 129, 127, 0, 19, 108}, {8791, 0, 129, 127, 0, 19, 108},
121     {8792, 0, 129, 127, 0, 19, 108}, {8793, 0, 129, 127, 0, 19, 108},
122     {8794, 0, 129, 127, 0, 19, 108}, {8795, 0, 129, 127, 0, 19, 108},
123     {8796, 0, 129, 127, 0, 19, 108}, {8797, 0, 129, 127, 0, 19, 108},
124     {8798, 0, 129, 127, 0, 19, 108}, {8799, 0, 129, 127, 0, 19, 108},
125     {8800, 0, 129, 127, 0, 19, 108}, {8801, 0, 129, 127, 0, 19, 108},
126     {8802, 0, 129, 127, 0, 19, 108}, {8803, 0, 129, 127, 0, 19, 108},
127     {8804, 0, 129, 127, 0, 19, 108}, {8805, 0, 129, 127, 0, 19, 108},
128     {8806, 0, 129, 127, 0, 19, 108}, {8807, 0, 129, 127, 0, 19, 108},
129     {8808, 0, 129, 127, 0, 19, 108}, {8809, 0, 129, 127, 0, 19, 108},
130     {8810, 0, 129, 127, 0, 19, 108}, {8811, 0, 129, 127, 0, 19, 114},
131     {8812, 0, 129, 127, 0, 19, 102}, {8813, 0, 129, 127, 0, 19, 114},
132     {8814, 0, 129, 127, 0, 76, 102}, {8815, 0, 129, 127, 0, 13, 121},
133     {8816, 0, 129, 127, 0, 19, 114}, {8817, 0, 129, 127, 0, 19, 127},
134     {8818, 0, 129, 127, 0, 19, 114}, {8819, 0, 129, 127, 0, 218, 108},
135 };
136 
137 // Boundary value to avoid integer overflow when adding 1/64th of the value.
138 constexpr int kMaxRectTop = 2114445437;
139 
FTPosToCBoxInt(FT_Pos pos)140 int FTPosToCBoxInt(FT_Pos pos) {
141   // Boundary values to avoid integer overflow when multiplied by 1000.
142   constexpr FT_Pos kMinCBox = -2147483;
143   constexpr FT_Pos kMaxCBox = 2147483;
144   return static_cast<int>(pdfium::clamp(pos, kMinCBox, kMaxCBox));
145 }
146 
147 #if !BUILDFLAG(IS_WIN)
148 
IsValidEmbeddedCharcodeFromUnicodeCharset(CIDSet charset)149 bool IsValidEmbeddedCharcodeFromUnicodeCharset(CIDSet charset) {
150   switch (charset) {
151     case CIDSET_GB1:
152     case CIDSET_CNS1:
153     case CIDSET_JAPAN1:
154     case CIDSET_KOREA1:
155       return true;
156 
157     default:
158       return false;
159   }
160 }
161 
EmbeddedUnicodeFromCharcode(const fxcmap::CMap * pEmbedMap,CIDSet charset,uint32_t charcode)162 wchar_t EmbeddedUnicodeFromCharcode(const fxcmap::CMap* pEmbedMap,
163                                     CIDSet charset,
164                                     uint32_t charcode) {
165   if (!IsValidEmbeddedCharcodeFromUnicodeCharset(charset))
166     return 0;
167 
168   uint16_t cid = fxcmap::CIDFromCharCode(pEmbedMap, charcode);
169   if (!cid)
170     return 0;
171 
172   pdfium::span<const uint16_t> map =
173       CPDF_FontGlobals::GetInstance()->GetEmbeddedToUnicode(charset);
174   return cid < map.size() ? map[cid] : 0;
175 }
176 
EmbeddedCharcodeFromUnicode(const fxcmap::CMap * pEmbedMap,CIDSet charset,wchar_t unicode)177 uint32_t EmbeddedCharcodeFromUnicode(const fxcmap::CMap* pEmbedMap,
178                                      CIDSet charset,
179                                      wchar_t unicode) {
180   if (!IsValidEmbeddedCharcodeFromUnicodeCharset(charset))
181     return 0;
182 
183   pdfium::span<const uint16_t> map =
184       CPDF_FontGlobals::GetInstance()->GetEmbeddedToUnicode(charset);
185   for (uint32_t i = 0; i < map.size(); ++i) {
186     if (map[i] == unicode) {
187       uint32_t charCode = fxcmap::CharCodeFromCID(pEmbedMap, i);
188       if (charCode)
189         return charCode;
190     }
191   }
192   return 0;
193 }
194 
195 #endif  // !BUILDFLAG(IS_WIN)
196 
FT_UseCIDCharmap(FXFT_FaceRec * face,CIDCoding coding)197 void FT_UseCIDCharmap(FXFT_FaceRec* face, CIDCoding coding) {
198   int encoding;
199   switch (coding) {
200     case CIDCoding::kGB:
201       encoding = FT_ENCODING_GB2312;
202       break;
203     case CIDCoding::kBIG5:
204       encoding = FT_ENCODING_BIG5;
205       break;
206     case CIDCoding::kJIS:
207       encoding = FT_ENCODING_SJIS;
208       break;
209     case CIDCoding::kKOREA:
210       encoding = FT_ENCODING_JOHAB;
211       break;
212     default:
213       encoding = FT_ENCODING_UNICODE;
214   }
215   int err = FXFT_Select_Charmap(face, encoding);
216   if (err)
217     err = FXFT_Select_Charmap(face, FT_ENCODING_UNICODE);
218   if (err && face->charmaps)
219     FT_Set_Charmap(face, face->charmaps[0]);
220 }
221 
IsMetricForCID(const int * pEntry,uint16_t cid)222 bool IsMetricForCID(const int* pEntry, uint16_t cid) {
223   return pEntry[0] <= cid && pEntry[1] >= cid;
224 }
225 
LoadMetricsArray(RetainPtr<const CPDF_Array> pArray,std::vector<int> * result,int nElements)226 void LoadMetricsArray(RetainPtr<const CPDF_Array> pArray,
227                       std::vector<int>* result,
228                       int nElements) {
229   int width_status = 0;
230   int iCurElement = 0;
231   int first_code = 0;
232   int last_code = 0;
233   for (size_t i = 0; i < pArray->size(); i++) {
234     RetainPtr<const CPDF_Object> pObj = pArray->GetDirectObjectAt(i);
235     if (!pObj)
236       continue;
237 
238     const CPDF_Array* pObjArray = pObj->AsArray();
239     if (pObjArray) {
240       if (width_status != 1)
241         return;
242       if (first_code > std::numeric_limits<int>::max() -
243                            fxcrt::CollectionSize<int>(*pObjArray)) {
244         width_status = 0;
245         continue;
246       }
247 
248       for (size_t j = 0; j < pObjArray->size(); j += nElements) {
249         result->push_back(first_code);
250         result->push_back(first_code);
251         for (int k = 0; k < nElements; k++)
252           result->push_back(pObjArray->GetIntegerAt(j + k));
253         first_code++;
254       }
255       width_status = 0;
256     } else {
257       if (width_status == 0) {
258         first_code = pObj->GetInteger();
259         width_status = 1;
260       } else if (width_status == 1) {
261         last_code = pObj->GetInteger();
262         width_status = 2;
263         iCurElement = 0;
264       } else {
265         if (!iCurElement) {
266           result->push_back(first_code);
267           result->push_back(last_code);
268         }
269         result->push_back(pObj->GetInteger());
270         iCurElement++;
271         if (iCurElement == nElements)
272           width_status = 0;
273       }
274     }
275   }
276 }
277 
278 }  // namespace
279 
CPDF_CIDFont(CPDF_Document * pDocument,RetainPtr<CPDF_Dictionary> pFontDict)280 CPDF_CIDFont::CPDF_CIDFont(CPDF_Document* pDocument,
281                            RetainPtr<CPDF_Dictionary> pFontDict)
282     : CPDF_Font(pDocument, std::move(pFontDict)) {
283   for (size_t i = 0; i < std::size(m_CharBBox); ++i)
284     m_CharBBox[i] = FX_RECT(-1, -1, -1, -1);
285 }
286 
287 CPDF_CIDFont::~CPDF_CIDFont() = default;
288 
IsCIDFont() const289 bool CPDF_CIDFont::IsCIDFont() const {
290   return true;
291 }
292 
AsCIDFont() const293 const CPDF_CIDFont* CPDF_CIDFont::AsCIDFont() const {
294   return this;
295 }
296 
AsCIDFont()297 CPDF_CIDFont* CPDF_CIDFont::AsCIDFont() {
298   return this;
299 }
300 
CIDFromCharCode(uint32_t charcode) const301 uint16_t CPDF_CIDFont::CIDFromCharCode(uint32_t charcode) const {
302   return m_pCMap ? m_pCMap->CIDFromCharCode(charcode)
303                  : static_cast<uint16_t>(charcode);
304 }
305 
IsVertWriting() const306 bool CPDF_CIDFont::IsVertWriting() const {
307   return m_pCMap && m_pCMap->IsVertWriting();
308 }
309 
UnicodeFromCharCode(uint32_t charcode) const310 WideString CPDF_CIDFont::UnicodeFromCharCode(uint32_t charcode) const {
311   WideString str = CPDF_Font::UnicodeFromCharCode(charcode);
312   if (!str.IsEmpty())
313     return str;
314   wchar_t ret = GetUnicodeFromCharCode(charcode);
315   return ret ? WideString(ret) : WideString();
316 }
317 
GetUnicodeFromCharCode(uint32_t charcode) const318 wchar_t CPDF_CIDFont::GetUnicodeFromCharCode(uint32_t charcode) const {
319   switch (m_pCMap->GetCoding()) {
320     case CIDCoding::kUCS2:
321     case CIDCoding::kUTF16:
322       return static_cast<wchar_t>(charcode);
323     case CIDCoding::kCID:
324       if (!m_pCID2UnicodeMap || !m_pCID2UnicodeMap->IsLoaded())
325         return 0;
326       return m_pCID2UnicodeMap->UnicodeFromCID(static_cast<uint16_t>(charcode));
327     default:
328       break;
329   }
330   if (m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded() && m_pCMap->IsLoaded())
331     return m_pCID2UnicodeMap->UnicodeFromCID(CIDFromCharCode(charcode));
332 
333 #if BUILDFLAG(IS_WIN)
334   wchar_t unicode;
335   int charsize = 1;
336   if (charcode > 255) {
337     charcode = (charcode % 256) * 256 + (charcode / 256);
338     charsize = 2;
339   }
340   size_t ret = FX_MultiByteToWideChar(
341       kCharsetCodePages[static_cast<size_t>(m_pCMap->GetCoding())],
342       ByteStringView(reinterpret_cast<const char*>(&charcode), charsize),
343       pdfium::make_span(&unicode, 1));
344   return ret == 1 ? unicode : 0;
345 #else
346   if (!m_pCMap->GetEmbedMap())
347     return 0;
348   return EmbeddedUnicodeFromCharcode(m_pCMap->GetEmbedMap(),
349                                      m_pCMap->GetCharset(), charcode);
350 #endif
351 }
352 
CharCodeFromUnicode(wchar_t unicode) const353 uint32_t CPDF_CIDFont::CharCodeFromUnicode(wchar_t unicode) const {
354   uint32_t charcode = CPDF_Font::CharCodeFromUnicode(unicode);
355   if (charcode)
356     return charcode;
357 
358   switch (m_pCMap->GetCoding()) {
359     case CIDCoding::kUNKNOWN:
360       return 0;
361     case CIDCoding::kUCS2:
362     case CIDCoding::kUTF16:
363       return unicode;
364     case CIDCoding::kCID: {
365       if (!m_pCID2UnicodeMap || !m_pCID2UnicodeMap->IsLoaded())
366         return 0;
367       uint32_t cid = 0;
368       while (cid < 65536) {
369         wchar_t this_unicode =
370             m_pCID2UnicodeMap->UnicodeFromCID(static_cast<uint16_t>(cid));
371         if (this_unicode == unicode)
372           return cid;
373         cid++;
374       }
375       break;
376     }
377     default:
378       break;
379   }
380 
381   if (unicode < 0x80)
382     return static_cast<uint32_t>(unicode);
383   if (m_pCMap->GetCoding() == CIDCoding::kCID)
384     return 0;
385 #if BUILDFLAG(IS_WIN)
386   uint8_t buffer[32];
387   size_t ret = FX_WideCharToMultiByte(
388       kCharsetCodePages[static_cast<size_t>(m_pCMap->GetCoding())],
389       WideStringView(&unicode, 1),
390       pdfium::make_span(reinterpret_cast<char*>(buffer), 4));
391   if (ret == 1)
392     return buffer[0];
393   if (ret == 2)
394     return buffer[0] * 256 + buffer[1];
395 #else
396   if (m_pCMap->GetEmbedMap()) {
397     return EmbeddedCharcodeFromUnicode(m_pCMap->GetEmbedMap(),
398                                        m_pCMap->GetCharset(), unicode);
399   }
400 #endif
401   return 0;
402 }
403 
Load()404 bool CPDF_CIDFont::Load() {
405   if (m_pFontDict->GetByteStringFor("Subtype") == "TrueType") {
406     LoadGB2312();
407     return true;
408   }
409 
410   RetainPtr<const CPDF_Array> pFonts =
411       m_pFontDict->GetArrayFor("DescendantFonts");
412   if (!pFonts || pFonts->size() != 1)
413     return false;
414 
415   RetainPtr<const CPDF_Dictionary> pCIDFontDict = pFonts->GetDictAt(0);
416   if (!pCIDFontDict)
417     return false;
418 
419   m_BaseFontName = pCIDFontDict->GetByteStringFor("BaseFont");
420   if ((m_BaseFontName == "CourierStd" || m_BaseFontName == "CourierStd-Bold" ||
421        m_BaseFontName == "CourierStd-BoldOblique" ||
422        m_BaseFontName == "CourierStd-Oblique") &&
423       !IsEmbedded()) {
424     m_bAdobeCourierStd = true;
425   }
426 
427   RetainPtr<const CPDF_Object> pEncoding =
428       m_pFontDict->GetDirectObjectFor("Encoding");
429   if (!pEncoding)
430     return false;
431 
432   ByteString subtype = pCIDFontDict->GetByteStringFor("Subtype");
433   m_FontType =
434       subtype == "CIDFontType0" ? CIDFontType::kType1 : CIDFontType::kTrueType;
435 
436   if (!pEncoding->IsName() && !pEncoding->IsStream())
437     return false;
438 
439   auto* pFontGlobals = CPDF_FontGlobals::GetInstance();
440   const CPDF_Stream* pEncodingStream = pEncoding->AsStream();
441   if (pEncodingStream) {
442     auto pAcc =
443         pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pEncodingStream));
444     pAcc->LoadAllDataFiltered();
445     pdfium::span<const uint8_t> span = pAcc->GetSpan();
446     m_pCMap = pdfium::MakeRetain<CPDF_CMap>(span);
447   } else {
448     DCHECK(pEncoding->IsName());
449     ByteString cmap = pEncoding->GetString();
450     m_pCMap = pFontGlobals->GetPredefinedCMap(cmap);
451   }
452 
453   RetainPtr<const CPDF_Dictionary> pFontDesc =
454       pCIDFontDict->GetDictFor("FontDescriptor");
455   if (pFontDesc)
456     LoadFontDescriptor(pFontDesc.Get());
457 
458   m_Charset = m_pCMap->GetCharset();
459   if (m_Charset == CIDSET_UNKNOWN) {
460     RetainPtr<const CPDF_Dictionary> pCIDInfo =
461         pCIDFontDict->GetDictFor("CIDSystemInfo");
462     if (pCIDInfo) {
463       m_Charset = CPDF_CMapParser::CharsetFromOrdering(
464           pCIDInfo->GetByteStringFor("Ordering").AsStringView());
465     }
466   }
467   if (m_Charset != CIDSET_UNKNOWN) {
468     m_pCID2UnicodeMap = pFontGlobals->GetCID2UnicodeMap(m_Charset);
469   }
470   if (m_Font.GetFaceRec()) {
471     if (m_FontType == CIDFontType::kType1)
472       FXFT_Select_Charmap(m_Font.GetFaceRec(), FT_ENCODING_UNICODE);
473     else
474       FT_UseCIDCharmap(m_Font.GetFaceRec(), m_pCMap->GetCoding());
475   }
476   m_DefaultWidth = pCIDFontDict->GetIntegerFor("DW", 1000);
477   RetainPtr<const CPDF_Array> pWidthArray = pCIDFontDict->GetArrayFor("W");
478   if (pWidthArray)
479     LoadMetricsArray(std::move(pWidthArray), &m_WidthList, 1);
480 
481   if (!IsEmbedded())
482     LoadSubstFont();
483 
484   RetainPtr<const CPDF_Object> pmap =
485       pCIDFontDict->GetDirectObjectFor("CIDToGIDMap");
486   if (pmap) {
487     RetainPtr<const CPDF_Stream> pMapStream(pmap->AsStream());
488     if (pMapStream) {
489       m_pStreamAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pMapStream));
490       m_pStreamAcc->LoadAllDataFiltered();
491     } else if (m_pFontFile && pmap->IsName() &&
492                pmap->GetString() == "Identity") {
493       m_bCIDIsGID = true;
494     }
495   }
496 
497   CheckFontMetrics();
498   if (IsVertWriting()) {
499     RetainPtr<const CPDF_Array> pWidth2Array = pCIDFontDict->GetArrayFor("W2");
500     if (pWidth2Array)
501       LoadMetricsArray(std::move(pWidth2Array), &m_VertMetrics, 3);
502 
503     RetainPtr<const CPDF_Array> pDefaultArray =
504         pCIDFontDict->GetArrayFor("DW2");
505     if (pDefaultArray) {
506       m_DefaultVY = pDefaultArray->GetIntegerAt(0);
507       m_DefaultW1 = pDefaultArray->GetIntegerAt(1);
508     }
509   }
510 
511   // TODO(thestig): Better identify font types and identify more font types.
512   if (m_FontType == CIDFontType::kTrueType && IsEmbedded())
513     m_Font.SetFontType(CFX_Font::FontType::kCIDTrueType);
514 
515   return true;
516 }
517 
GetCharBBox(uint32_t charcode)518 FX_RECT CPDF_CIDFont::GetCharBBox(uint32_t charcode) {
519   if (charcode < 256 && m_CharBBox[charcode].right != -1)
520     return m_CharBBox[charcode];
521 
522   FX_RECT rect;
523   bool bVert = false;
524   int glyph_index = GlyphFromCharCode(charcode, &bVert);
525   FXFT_FaceRec* face = m_Font.GetFaceRec();
526   if (face) {
527     if (FXFT_Is_Face_Tricky(face)) {
528       int err =
529           FT_Load_Glyph(face, glyph_index, FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH);
530       if (!err) {
531         FT_Glyph glyph;
532         err = FT_Get_Glyph(face->glyph, &glyph);
533         if (!err) {
534           FT_BBox cbox;
535           FT_Glyph_Get_CBox(glyph, FT_GLYPH_BBOX_PIXELS, &cbox);
536           const int xMin = FTPosToCBoxInt(cbox.xMin);
537           const int xMax = FTPosToCBoxInt(cbox.xMax);
538           const int yMin = FTPosToCBoxInt(cbox.yMin);
539           const int yMax = FTPosToCBoxInt(cbox.yMax);
540           const int pixel_size_x = face->size->metrics.x_ppem;
541           const int pixel_size_y = face->size->metrics.y_ppem;
542           if (pixel_size_x == 0 || pixel_size_y == 0) {
543             rect = FX_RECT(xMin, yMax, xMax, yMin);
544           } else {
545             rect =
546                 FX_RECT(xMin * 1000 / pixel_size_x, yMax * 1000 / pixel_size_y,
547                         xMax * 1000 / pixel_size_x, yMin * 1000 / pixel_size_y);
548           }
549           rect.top = std::min(rect.top,
550                               static_cast<int>(FXFT_Get_Face_Ascender(face)));
551           rect.bottom = std::max(
552               rect.bottom, static_cast<int>(FXFT_Get_Face_Descender(face)));
553           FT_Done_Glyph(glyph);
554         }
555       }
556     } else {
557       int err = FT_Load_Glyph(face, glyph_index, FT_LOAD_NO_SCALE);
558       if (err == 0) {
559         rect = FX_RECT(TT2PDF(FXFT_Get_Glyph_HoriBearingX(face), face),
560                        TT2PDF(FXFT_Get_Glyph_HoriBearingY(face), face),
561                        TT2PDF(FXFT_Get_Glyph_HoriBearingX(face) +
562                                   FXFT_Get_Glyph_Width(face),
563                               face),
564                        TT2PDF(FXFT_Get_Glyph_HoriBearingY(face) -
565                                   FXFT_Get_Glyph_Height(face),
566                               face));
567         if (rect.top <= kMaxRectTop)
568           rect.top += rect.top / 64;
569         else
570           rect.top = std::numeric_limits<int>::max();
571       }
572     }
573   }
574   if (!m_pFontFile && m_Charset == CIDSET_JAPAN1) {
575     uint16_t cid = CIDFromCharCode(charcode);
576     const uint8_t* pTransform = GetCIDTransform(cid);
577     if (pTransform && !bVert) {
578       CFX_Matrix matrix(CIDTransformToFloat(pTransform[0]),
579                         CIDTransformToFloat(pTransform[1]),
580                         CIDTransformToFloat(pTransform[2]),
581                         CIDTransformToFloat(pTransform[3]),
582                         CIDTransformToFloat(pTransform[4]) * 1000,
583                         CIDTransformToFloat(pTransform[5]) * 1000);
584       rect = matrix.TransformRect(CFX_FloatRect(rect)).GetOuterRect();
585     }
586   }
587   if (charcode < 256)
588     m_CharBBox[charcode] = rect;
589 
590   return rect;
591 }
592 
GetCharWidthF(uint32_t charcode)593 int CPDF_CIDFont::GetCharWidthF(uint32_t charcode) {
594   if (charcode < 0x80 && m_bAnsiWidthsFixed)
595     return (charcode >= 32 && charcode < 127) ? 500 : 0;
596 
597   uint16_t cid = CIDFromCharCode(charcode);
598   size_t size = m_WidthList.size();
599   const int* pList = m_WidthList.data();
600   for (size_t i = 0; i < size; i += 3) {
601     const int* pEntry = pList + i;
602     if (IsMetricForCID(pEntry, cid))
603       return pEntry[2];
604   }
605   return m_DefaultWidth;
606 }
607 
GetVertWidth(uint16_t cid) const608 int16_t CPDF_CIDFont::GetVertWidth(uint16_t cid) const {
609   size_t vertsize = m_VertMetrics.size() / 5;
610   if (vertsize) {
611     const int* pTable = m_VertMetrics.data();
612     for (size_t i = 0; i < vertsize; i++) {
613       const int* pEntry = pTable + (i * 5);
614       if (IsMetricForCID(pEntry, cid))
615         return static_cast<int16_t>(pEntry[2]);
616     }
617   }
618   return m_DefaultW1;
619 }
620 
GetVertOrigin(uint16_t cid) const621 CFX_Point16 CPDF_CIDFont::GetVertOrigin(uint16_t cid) const {
622   size_t vertsize = m_VertMetrics.size() / 5;
623   if (vertsize) {
624     const int* pTable = m_VertMetrics.data();
625     for (size_t i = 0; i < vertsize; i++) {
626       const int* pEntry = pTable + (i * 5);
627       if (IsMetricForCID(pEntry, cid)) {
628         return {static_cast<int16_t>(pEntry[3]),
629                 static_cast<int16_t>(pEntry[4])};
630       }
631     }
632   }
633   int width = m_DefaultWidth;
634   size_t size = m_WidthList.size();
635   const int* pList = m_WidthList.data();
636   for (size_t i = 0; i < size; i += 3) {
637     const int* pEntry = pList + i;
638     if (IsMetricForCID(pEntry, cid)) {
639       width = pEntry[2];
640       break;
641     }
642   }
643   return {static_cast<int16_t>(width / 2), m_DefaultVY};
644 }
645 
GetGlyphIndex(uint32_t unicode,bool * pVertGlyph)646 int CPDF_CIDFont::GetGlyphIndex(uint32_t unicode, bool* pVertGlyph) {
647   if (pVertGlyph)
648     *pVertGlyph = false;
649 
650   FXFT_FaceRec* face = m_Font.GetFaceRec();
651   int index = FT_Get_Char_Index(face, unicode);
652   if (unicode == pdfium::unicode::kBoxDrawingsLightVerical)
653     return index;
654 
655   if (!index || !IsVertWriting())
656     return index;
657 
658   if (m_pTTGSUBTable)
659     return GetVerticalGlyph(index, pVertGlyph);
660 
661   static constexpr uint32_t kGsubTag =
662       CFX_FontMapper::MakeTag('G', 'S', 'U', 'B');
663   if (!m_Font.GetSubData()) {
664     unsigned long length = 0;
665     int error = FT_Load_Sfnt_Table(face, kGsubTag, 0, nullptr, &length);
666     if (!error)
667       m_Font.AllocSubData(length);
668   }
669   int error =
670       FT_Load_Sfnt_Table(face, kGsubTag, 0, m_Font.GetSubData(), nullptr);
671   if (error || !m_Font.GetSubData())
672     return index;
673 
674   m_pTTGSUBTable = std::make_unique<CFX_CTTGSUBTable>(m_Font.GetSubData());
675   return GetVerticalGlyph(index, pVertGlyph);
676 }
677 
GetVerticalGlyph(int index,bool * pVertGlyph)678 int CPDF_CIDFont::GetVerticalGlyph(int index, bool* pVertGlyph) {
679   uint32_t vindex = m_pTTGSUBTable->GetVerticalGlyph(index);
680   if (!vindex)
681     return index;
682 
683   index = vindex;
684   if (pVertGlyph)
685     *pVertGlyph = true;
686   return index;
687 }
688 
GlyphFromCharCode(uint32_t charcode,bool * pVertGlyph)689 int CPDF_CIDFont::GlyphFromCharCode(uint32_t charcode, bool* pVertGlyph) {
690   if (pVertGlyph)
691     *pVertGlyph = false;
692 
693   if (!m_pFontFile && (!m_pStreamAcc || m_pCID2UnicodeMap)) {
694     uint16_t cid = CIDFromCharCode(charcode);
695     wchar_t unicode = 0;
696     if (m_bCIDIsGID) {
697 #if BUILDFLAG(IS_APPLE)
698       if (FontStyleIsSymbolic(m_Flags))
699         return cid;
700 
701       WideString uni_str = UnicodeFromCharCode(charcode);
702       if (uni_str.IsEmpty())
703         return cid;
704 
705       unicode = uni_str[0];
706 #else
707       return cid;
708 #endif
709     } else {
710       if (cid && m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded())
711         unicode = m_pCID2UnicodeMap->UnicodeFromCID(cid);
712       if (unicode == 0)
713         unicode = GetUnicodeFromCharCode(charcode);
714       if (unicode == 0) {
715         WideString unicode_str = UnicodeFromCharCode(charcode);
716         if (!unicode_str.IsEmpty())
717           unicode = unicode_str[0];
718       }
719     }
720     FXFT_FaceRec* face = m_Font.GetFaceRec();
721     if (unicode == 0) {
722       if (!m_bAdobeCourierStd)
723         return charcode ? static_cast<int>(charcode) : -1;
724 
725       charcode += 31;
726       bool bMSUnicode = UseTTCharmapMSUnicode(face);
727       bool bMacRoman = !bMSUnicode && UseTTCharmapMacRoman(face);
728       FontEncoding base_encoding = FontEncoding::kStandard;
729       if (bMSUnicode)
730         base_encoding = FontEncoding::kWinAnsi;
731       else if (bMacRoman)
732         base_encoding = FontEncoding::kMacRoman;
733       const char* name =
734           GetAdobeCharName(base_encoding, std::vector<ByteString>(), charcode);
735       if (!name)
736         return charcode ? static_cast<int>(charcode) : -1;
737 
738       int index = 0;
739       uint16_t name_unicode = UnicodeFromAdobeName(name);
740       if (!name_unicode)
741         return charcode ? static_cast<int>(charcode) : -1;
742 
743       if (base_encoding == FontEncoding::kStandard)
744         return FT_Get_Char_Index(face, name_unicode);
745 
746       if (base_encoding == FontEncoding::kWinAnsi) {
747         index = FT_Get_Char_Index(face, name_unicode);
748       } else {
749         DCHECK_EQ(base_encoding, FontEncoding::kMacRoman);
750         uint32_t maccode = CharCodeFromUnicodeForFreetypeEncoding(
751             FT_ENCODING_APPLE_ROMAN, name_unicode);
752         index = maccode ? FT_Get_Char_Index(face, maccode)
753                         : FT_Get_Name_Index(face, name);
754       }
755       if (index == 0 || index == 0xffff)
756         return charcode ? static_cast<int>(charcode) : -1;
757       return index;
758     }
759     if (m_Charset == CIDSET_JAPAN1) {
760       if (unicode == '\\') {
761         unicode = '/';
762 #if !BUILDFLAG(IS_APPLE)
763       } else if (unicode == 0xa5) {
764         unicode = 0x5c;
765 #endif
766       }
767     }
768     if (!face)
769       return unicode;
770 
771     int err = FXFT_Select_Charmap(face, FT_ENCODING_UNICODE);
772     if (err) {
773       int i;
774       for (i = 0; i < face->num_charmaps; i++) {
775         uint32_t ret = CharCodeFromUnicodeForFreetypeEncoding(
776             FXFT_Get_Charmap_Encoding(face->charmaps[i]),
777             static_cast<wchar_t>(charcode));
778         if (ret == 0)
779           continue;
780         FT_Set_Charmap(face, face->charmaps[i]);
781         unicode = static_cast<wchar_t>(ret);
782         break;
783       }
784       if (i == face->num_charmaps && i) {
785         FT_Set_Charmap(face, face->charmaps[0]);
786         unicode = static_cast<wchar_t>(charcode);
787       }
788     }
789     if (face->charmap) {
790       int index = GetGlyphIndex(unicode, pVertGlyph);
791       return index != 0 ? index : -1;
792     }
793     return unicode;
794   }
795 
796   if (!m_Font.GetFaceRec())
797     return -1;
798 
799   uint16_t cid = CIDFromCharCode(charcode);
800   if (!m_pStreamAcc) {
801     if (m_FontType == CIDFontType::kType1)
802       return cid;
803     if (m_pFontFile && m_pCMap->IsDirectCharcodeToCIDTableIsEmpty())
804       return cid;
805 
806     FT_CharMap charmap = m_Font.GetFaceRec()->charmap;
807     if (!charmap || m_pCMap->GetCoding() == CIDCoding::kUNKNOWN)
808       return cid;
809 
810     if (FXFT_Get_Charmap_Encoding(charmap) == FT_ENCODING_UNICODE) {
811       WideString unicode_str = UnicodeFromCharCode(charcode);
812       if (unicode_str.IsEmpty())
813         return -1;
814 
815       charcode = unicode_str[0];
816     }
817     return GetGlyphIndex(charcode, pVertGlyph);
818   }
819   uint32_t byte_pos = cid * 2;
820   if (byte_pos + 2 > m_pStreamAcc->GetSize())
821     return -1;
822 
823   pdfium::span<const uint8_t> span = m_pStreamAcc->GetSpan().subspan(byte_pos);
824   return span[0] * 256 + span[1];
825 }
826 
GetNextChar(ByteStringView pString,size_t * pOffset) const827 uint32_t CPDF_CIDFont::GetNextChar(ByteStringView pString,
828                                    size_t* pOffset) const {
829   return m_pCMap->GetNextChar(pString, pOffset);
830 }
831 
GetCharSize(uint32_t charcode) const832 int CPDF_CIDFont::GetCharSize(uint32_t charcode) const {
833   return m_pCMap->GetCharSize(charcode);
834 }
835 
CountChar(ByteStringView pString) const836 size_t CPDF_CIDFont::CountChar(ByteStringView pString) const {
837   return m_pCMap->CountChar(pString);
838 }
839 
AppendChar(char * str,uint32_t charcode) const840 int CPDF_CIDFont::AppendChar(char* str, uint32_t charcode) const {
841   return m_pCMap->AppendChar(str, charcode);
842 }
843 
IsUnicodeCompatible() const844 bool CPDF_CIDFont::IsUnicodeCompatible() const {
845   if (m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded() && m_pCMap->IsLoaded())
846     return true;
847   return m_pCMap->GetCoding() != CIDCoding::kUNKNOWN;
848 }
849 
LoadSubstFont()850 void CPDF_CIDFont::LoadSubstFont() {
851   FX_SAFE_INT32 safeStemV(m_StemV);
852   safeStemV *= 5;
853   m_Font.LoadSubst(m_BaseFontName, m_FontType == CIDFontType::kTrueType,
854                    m_Flags, safeStemV.ValueOrDefault(FXFONT_FW_NORMAL),
855                    m_ItalicAngle, kCharsetCodePages[m_Charset],
856                    IsVertWriting());
857 }
858 
859 // static
CIDTransformToFloat(uint8_t ch)860 float CPDF_CIDFont::CIDTransformToFloat(uint8_t ch) {
861   return (ch < 128 ? ch : ch - 255) * (1.0f / 127);
862 }
863 
LoadGB2312()864 void CPDF_CIDFont::LoadGB2312() {
865   m_BaseFontName = m_pFontDict->GetByteStringFor("BaseFont");
866   m_Charset = CIDSET_GB1;
867 
868   auto* pFontGlobals = CPDF_FontGlobals::GetInstance();
869   m_pCMap = pFontGlobals->GetPredefinedCMap("GBK-EUC-H");
870   m_pCID2UnicodeMap = pFontGlobals->GetCID2UnicodeMap(m_Charset);
871   RetainPtr<const CPDF_Dictionary> pFontDesc =
872       m_pFontDict->GetDictFor("FontDescriptor");
873   if (pFontDesc)
874     LoadFontDescriptor(pFontDesc.Get());
875 
876   if (!IsEmbedded())
877     LoadSubstFont();
878   CheckFontMetrics();
879   m_bAnsiWidthsFixed = true;
880 }
881 
GetCIDTransform(uint16_t cid) const882 const uint8_t* CPDF_CIDFont::GetCIDTransform(uint16_t cid) const {
883   if (m_Charset != CIDSET_JAPAN1 || m_pFontFile)
884     return nullptr;
885 
886   const auto* pEnd = kJapan1VerticalCIDs + std::size(kJapan1VerticalCIDs);
887   const auto* pTransform = std::lower_bound(
888       kJapan1VerticalCIDs, pEnd, cid,
889       [](const CIDTransform& entry, uint16_t cid) { return entry.cid < cid; });
890   return (pTransform < pEnd && cid == pTransform->cid) ? &pTransform->a
891                                                        : nullptr;
892 }
893