• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/font/cpdf_cidfont.h"
8 
9 #include <algorithm>
10 #include <array>
11 #include <limits>
12 #include <utility>
13 #include <vector>
14 
15 #include "build/build_config.h"
16 #include "core/fpdfapi/cmaps/fpdf_cmaps.h"
17 #include "core/fpdfapi/font/cfx_cttgsubtable.h"
18 #include "core/fpdfapi/font/cpdf_cid2unicodemap.h"
19 #include "core/fpdfapi/font/cpdf_cmap.h"
20 #include "core/fpdfapi/font/cpdf_cmapparser.h"
21 #include "core/fpdfapi/font/cpdf_fontencoding.h"
22 #include "core/fpdfapi/font/cpdf_fontglobals.h"
23 #include "core/fpdfapi/parser/cpdf_array.h"
24 #include "core/fpdfapi/parser/cpdf_dictionary.h"
25 #include "core/fpdfapi/parser/cpdf_stream.h"
26 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
27 #include "core/fxcrt/check.h"
28 #include "core/fxcrt/check_op.h"
29 #include "core/fxcrt/fixed_size_data_vector.h"
30 #include "core/fxcrt/fx_codepage.h"
31 #include "core/fxcrt/fx_memory.h"
32 #include "core/fxcrt/fx_safe_types.h"
33 #include "core/fxcrt/fx_unicode.h"
34 #include "core/fxcrt/span.h"
35 #include "core/fxcrt/span_util.h"
36 #include "core/fxcrt/stl_util.h"
37 #include "core/fxge/fx_font.h"
38 
39 namespace {
40 
41 struct LowHighVal {
42   int low;
43   int high;
44   int val;
45 };
46 
47 struct LowHighValXY : LowHighVal {
48   int x;
49   int y;
50 };
51 
IsMetricForCID(const LowHighVal & val,uint16_t cid)52 bool IsMetricForCID(const LowHighVal& val, uint16_t cid) {
53   return val.low <= cid && cid <= val.high;
54 }
55 
56 constexpr std::array<FX_CodePage, CIDSET_NUM_SETS> kCharsetCodePages = {
57     FX_CodePage::kDefANSI,
58     FX_CodePage::kChineseSimplified,
59     FX_CodePage::kChineseTraditional,
60     FX_CodePage::kShiftJIS,
61     FX_CodePage::kHangul,
62     FX_CodePage::kUTF16LE,
63 };
64 
65 constexpr CIDTransform kJapan1VerticalCIDs[] = {
66     {97, 129, 0, 0, 127, 55, 0},     {7887, 127, 0, 0, 127, 76, 89},
67     {7888, 127, 0, 0, 127, 79, 94},  {7889, 0, 129, 127, 0, 17, 127},
68     {7890, 0, 129, 127, 0, 17, 127}, {7891, 0, 129, 127, 0, 17, 127},
69     {7892, 0, 129, 127, 0, 17, 127}, {7893, 0, 129, 127, 0, 17, 127},
70     {7894, 0, 129, 127, 0, 17, 127}, {7895, 0, 129, 127, 0, 17, 127},
71     {7896, 0, 129, 127, 0, 17, 127}, {7897, 0, 129, 127, 0, 17, 127},
72     {7898, 0, 129, 127, 0, 17, 127}, {7899, 0, 129, 127, 0, 17, 104},
73     {7900, 0, 129, 127, 0, 17, 127}, {7901, 0, 129, 127, 0, 17, 104},
74     {7902, 0, 129, 127, 0, 17, 127}, {7903, 0, 129, 127, 0, 17, 127},
75     {7904, 0, 129, 127, 0, 17, 127}, {7905, 0, 129, 127, 0, 17, 114},
76     {7906, 0, 129, 127, 0, 17, 127}, {7907, 0, 129, 127, 0, 17, 127},
77     {7908, 0, 129, 127, 0, 17, 127}, {7909, 0, 129, 127, 0, 17, 127},
78     {7910, 0, 129, 127, 0, 17, 127}, {7911, 0, 129, 127, 0, 17, 127},
79     {7912, 0, 129, 127, 0, 17, 127}, {7913, 0, 129, 127, 0, 17, 127},
80     {7914, 0, 129, 127, 0, 17, 127}, {7915, 0, 129, 127, 0, 17, 114},
81     {7916, 0, 129, 127, 0, 17, 127}, {7917, 0, 129, 127, 0, 17, 127},
82     {7918, 127, 0, 0, 127, 18, 25},  {7919, 127, 0, 0, 127, 18, 25},
83     {7920, 127, 0, 0, 127, 18, 25},  {7921, 127, 0, 0, 127, 18, 25},
84     {7922, 127, 0, 0, 127, 18, 25},  {7923, 127, 0, 0, 127, 18, 25},
85     {7924, 127, 0, 0, 127, 18, 25},  {7925, 127, 0, 0, 127, 18, 25},
86     {7926, 127, 0, 0, 127, 18, 25},  {7927, 127, 0, 0, 127, 18, 25},
87     {7928, 127, 0, 0, 127, 18, 25},  {7929, 127, 0, 0, 127, 18, 25},
88     {7930, 127, 0, 0, 127, 18, 25},  {7931, 127, 0, 0, 127, 18, 25},
89     {7932, 127, 0, 0, 127, 18, 25},  {7933, 127, 0, 0, 127, 18, 25},
90     {7934, 127, 0, 0, 127, 18, 25},  {7935, 127, 0, 0, 127, 18, 25},
91     {7936, 127, 0, 0, 127, 18, 25},  {7937, 127, 0, 0, 127, 18, 25},
92     {7938, 127, 0, 0, 127, 18, 25},  {7939, 127, 0, 0, 127, 18, 25},
93     {8720, 0, 129, 127, 0, 19, 102}, {8721, 0, 129, 127, 0, 13, 127},
94     {8722, 0, 129, 127, 0, 19, 108}, {8723, 0, 129, 127, 0, 19, 102},
95     {8724, 0, 129, 127, 0, 19, 102}, {8725, 0, 129, 127, 0, 19, 102},
96     {8726, 0, 129, 127, 0, 19, 102}, {8727, 0, 129, 127, 0, 19, 102},
97     {8728, 0, 129, 127, 0, 19, 114}, {8729, 0, 129, 127, 0, 19, 114},
98     {8730, 0, 129, 127, 0, 38, 108}, {8731, 0, 129, 127, 0, 13, 108},
99     {8732, 0, 129, 127, 0, 19, 108}, {8733, 0, 129, 127, 0, 19, 108},
100     {8734, 0, 129, 127, 0, 19, 108}, {8735, 0, 129, 127, 0, 19, 108},
101     {8736, 0, 129, 127, 0, 19, 102}, {8737, 0, 129, 127, 0, 19, 102},
102     {8738, 0, 129, 127, 0, 19, 102}, {8739, 0, 129, 127, 0, 19, 102},
103     {8740, 0, 129, 127, 0, 19, 102}, {8741, 0, 129, 127, 0, 19, 102},
104     {8742, 0, 129, 127, 0, 19, 102}, {8743, 0, 129, 127, 0, 19, 102},
105     {8744, 0, 129, 127, 0, 19, 102}, {8745, 0, 129, 127, 0, 19, 102},
106     {8746, 0, 129, 127, 0, 19, 114}, {8747, 0, 129, 127, 0, 19, 114},
107     {8748, 0, 129, 127, 0, 19, 102}, {8749, 0, 129, 127, 0, 19, 102},
108     {8750, 0, 129, 127, 0, 19, 102}, {8751, 0, 129, 127, 0, 19, 102},
109     {8752, 0, 129, 127, 0, 19, 102}, {8753, 0, 129, 127, 0, 19, 102},
110     {8754, 0, 129, 127, 0, 19, 102}, {8755, 0, 129, 127, 0, 19, 102},
111     {8756, 0, 129, 127, 0, 19, 102}, {8757, 0, 129, 127, 0, 19, 102},
112     {8758, 0, 129, 127, 0, 19, 102}, {8759, 0, 129, 127, 0, 19, 102},
113     {8760, 0, 129, 127, 0, 19, 102}, {8761, 0, 129, 127, 0, 19, 102},
114     {8762, 0, 129, 127, 0, 19, 102}, {8763, 0, 129, 127, 0, 19, 102},
115     {8764, 0, 129, 127, 0, 19, 102}, {8765, 0, 129, 127, 0, 19, 102},
116     {8766, 0, 129, 127, 0, 19, 102}, {8767, 0, 129, 127, 0, 19, 102},
117     {8768, 0, 129, 127, 0, 19, 102}, {8769, 0, 129, 127, 0, 19, 102},
118     {8770, 0, 129, 127, 0, 19, 102}, {8771, 0, 129, 127, 0, 19, 102},
119     {8772, 0, 129, 127, 0, 19, 102}, {8773, 0, 129, 127, 0, 19, 102},
120     {8774, 0, 129, 127, 0, 19, 102}, {8775, 0, 129, 127, 0, 19, 102},
121     {8776, 0, 129, 127, 0, 19, 102}, {8777, 0, 129, 127, 0, 19, 102},
122     {8778, 0, 129, 127, 0, 19, 102}, {8779, 0, 129, 127, 0, 19, 114},
123     {8780, 0, 129, 127, 0, 19, 108}, {8781, 0, 129, 127, 0, 19, 114},
124     {8782, 0, 129, 127, 0, 13, 114}, {8783, 0, 129, 127, 0, 19, 108},
125     {8784, 0, 129, 127, 0, 13, 114}, {8785, 0, 129, 127, 0, 19, 108},
126     {8786, 0, 129, 127, 0, 19, 108}, {8787, 0, 129, 127, 0, 19, 108},
127     {8788, 0, 129, 127, 0, 19, 108}, {8789, 0, 129, 127, 0, 19, 108},
128     {8790, 0, 129, 127, 0, 19, 108}, {8791, 0, 129, 127, 0, 19, 108},
129     {8792, 0, 129, 127, 0, 19, 108}, {8793, 0, 129, 127, 0, 19, 108},
130     {8794, 0, 129, 127, 0, 19, 108}, {8795, 0, 129, 127, 0, 19, 108},
131     {8796, 0, 129, 127, 0, 19, 108}, {8797, 0, 129, 127, 0, 19, 108},
132     {8798, 0, 129, 127, 0, 19, 108}, {8799, 0, 129, 127, 0, 19, 108},
133     {8800, 0, 129, 127, 0, 19, 108}, {8801, 0, 129, 127, 0, 19, 108},
134     {8802, 0, 129, 127, 0, 19, 108}, {8803, 0, 129, 127, 0, 19, 108},
135     {8804, 0, 129, 127, 0, 19, 108}, {8805, 0, 129, 127, 0, 19, 108},
136     {8806, 0, 129, 127, 0, 19, 108}, {8807, 0, 129, 127, 0, 19, 108},
137     {8808, 0, 129, 127, 0, 19, 108}, {8809, 0, 129, 127, 0, 19, 108},
138     {8810, 0, 129, 127, 0, 19, 108}, {8811, 0, 129, 127, 0, 19, 114},
139     {8812, 0, 129, 127, 0, 19, 102}, {8813, 0, 129, 127, 0, 19, 114},
140     {8814, 0, 129, 127, 0, 76, 102}, {8815, 0, 129, 127, 0, 13, 121},
141     {8816, 0, 129, 127, 0, 19, 114}, {8817, 0, 129, 127, 0, 19, 127},
142     {8818, 0, 129, 127, 0, 19, 114}, {8819, 0, 129, 127, 0, 218, 108},
143 };
144 
145 #if !BUILDFLAG(IS_WIN)
146 
IsValidEmbeddedCharcodeFromUnicodeCharset(CIDSet charset)147 bool IsValidEmbeddedCharcodeFromUnicodeCharset(CIDSet charset) {
148   switch (charset) {
149     case CIDSET_GB1:
150     case CIDSET_CNS1:
151     case CIDSET_JAPAN1:
152     case CIDSET_KOREA1:
153       return true;
154 
155     default:
156       return false;
157   }
158 }
159 
EmbeddedUnicodeFromCharcode(const fxcmap::CMap * pEmbedMap,CIDSet charset,uint32_t charcode)160 wchar_t EmbeddedUnicodeFromCharcode(const fxcmap::CMap* pEmbedMap,
161                                     CIDSet charset,
162                                     uint32_t charcode) {
163   if (!IsValidEmbeddedCharcodeFromUnicodeCharset(charset))
164     return 0;
165 
166   uint16_t cid = fxcmap::CIDFromCharCode(pEmbedMap, charcode);
167   if (!cid)
168     return 0;
169 
170   pdfium::span<const uint16_t> map =
171       CPDF_FontGlobals::GetInstance()->GetEmbeddedToUnicode(charset);
172   return cid < map.size() ? map[cid] : 0;
173 }
174 
EmbeddedCharcodeFromUnicode(const fxcmap::CMap * pEmbedMap,CIDSet charset,wchar_t unicode)175 uint32_t EmbeddedCharcodeFromUnicode(const fxcmap::CMap* pEmbedMap,
176                                      CIDSet charset,
177                                      wchar_t unicode) {
178   if (!IsValidEmbeddedCharcodeFromUnicodeCharset(charset))
179     return 0;
180 
181   pdfium::span<const uint16_t> map =
182       CPDF_FontGlobals::GetInstance()->GetEmbeddedToUnicode(charset);
183   for (uint32_t i = 0; i < map.size(); ++i) {
184     if (map[i] == unicode) {
185       uint32_t charCode = fxcmap::CharCodeFromCID(pEmbedMap, i);
186       if (charCode)
187         return charCode;
188     }
189   }
190   return 0;
191 }
192 
193 #endif  // !BUILDFLAG(IS_WIN)
194 
UseCIDCharmap(const RetainPtr<CFX_Face> & face,CIDCoding coding)195 void UseCIDCharmap(const RetainPtr<CFX_Face>& face, CIDCoding coding) {
196   fxge::FontEncoding encoding;
197   switch (coding) {
198     case CIDCoding::kGB:
199       encoding = fxge::FontEncoding::kGB2312;
200       break;
201     case CIDCoding::kBIG5:
202       encoding = fxge::FontEncoding::kBig5;
203       break;
204     case CIDCoding::kJIS:
205       encoding = fxge::FontEncoding::kSjis;
206       break;
207     case CIDCoding::kKOREA:
208       encoding = fxge::FontEncoding::kJohab;
209       break;
210     default:
211       encoding = fxge::FontEncoding::kUnicode;
212   }
213   bool result = face->SelectCharMap(encoding);
214   if (!result) {
215     result = face->SelectCharMap(fxge::FontEncoding::kUnicode);
216   }
217   if (!result && face->GetCharMapCount()) {
218     face->SetCharMapByIndex(0);
219   }
220 }
221 
LoadMetricsArray(RetainPtr<const CPDF_Array> pArray,std::vector<int> * result,int nElements)222 void LoadMetricsArray(RetainPtr<const CPDF_Array> pArray,
223                       std::vector<int>* result,
224                       int nElements) {
225   int width_status = 0;
226   int iCurElement = 0;
227   int first_code = 0;
228   int last_code = 0;
229   for (size_t i = 0; i < pArray->size(); i++) {
230     RetainPtr<const CPDF_Object> pObj = pArray->GetDirectObjectAt(i);
231     if (!pObj)
232       continue;
233 
234     const CPDF_Array* pObjArray = pObj->AsArray();
235     if (pObjArray) {
236       if (width_status != 1)
237         return;
238       if (first_code > std::numeric_limits<int>::max() -
239                            fxcrt::CollectionSize<int>(*pObjArray)) {
240         width_status = 0;
241         continue;
242       }
243 
244       for (size_t j = 0; j < pObjArray->size(); j += nElements) {
245         result->push_back(first_code);
246         result->push_back(first_code);
247         for (int k = 0; k < nElements; k++)
248           result->push_back(pObjArray->GetIntegerAt(j + k));
249         first_code++;
250       }
251       width_status = 0;
252     } else {
253       if (width_status == 0) {
254         first_code = pObj->GetInteger();
255         width_status = 1;
256       } else if (width_status == 1) {
257         last_code = pObj->GetInteger();
258         width_status = 2;
259         iCurElement = 0;
260       } else {
261         if (!iCurElement) {
262           result->push_back(first_code);
263           result->push_back(last_code);
264         }
265         result->push_back(pObj->GetInteger());
266         iCurElement++;
267         if (iCurElement == nElements)
268           width_status = 0;
269       }
270     }
271   }
272 }
273 
274 }  // namespace
275 
CPDF_CIDFont(CPDF_Document * pDocument,RetainPtr<CPDF_Dictionary> pFontDict)276 CPDF_CIDFont::CPDF_CIDFont(CPDF_Document* pDocument,
277                            RetainPtr<CPDF_Dictionary> pFontDict)
278     : CPDF_Font(pDocument, std::move(pFontDict)) {
279   for (size_t i = 0; i < std::size(m_CharBBox); ++i)
280     m_CharBBox[i] = FX_RECT(-1, -1, -1, -1);
281 }
282 
283 CPDF_CIDFont::~CPDF_CIDFont() = default;
284 
IsCIDFont() const285 bool CPDF_CIDFont::IsCIDFont() const {
286   return true;
287 }
288 
AsCIDFont() const289 const CPDF_CIDFont* CPDF_CIDFont::AsCIDFont() const {
290   return this;
291 }
292 
AsCIDFont()293 CPDF_CIDFont* CPDF_CIDFont::AsCIDFont() {
294   return this;
295 }
296 
CIDFromCharCode(uint32_t charcode) const297 uint16_t CPDF_CIDFont::CIDFromCharCode(uint32_t charcode) const {
298   return m_pCMap ? m_pCMap->CIDFromCharCode(charcode)
299                  : static_cast<uint16_t>(charcode);
300 }
301 
IsVertWriting() const302 bool CPDF_CIDFont::IsVertWriting() const {
303   return m_pCMap && m_pCMap->IsVertWriting();
304 }
305 
UnicodeFromCharCode(uint32_t charcode) const306 WideString CPDF_CIDFont::UnicodeFromCharCode(uint32_t charcode) const {
307   WideString str = CPDF_Font::UnicodeFromCharCode(charcode);
308   if (!str.IsEmpty())
309     return str;
310   wchar_t ret = GetUnicodeFromCharCode(charcode);
311   return ret ? WideString(ret) : WideString();
312 }
313 
GetUnicodeFromCharCode(uint32_t charcode) const314 wchar_t CPDF_CIDFont::GetUnicodeFromCharCode(uint32_t charcode) const {
315   switch (m_pCMap->GetCoding()) {
316     case CIDCoding::kUCS2:
317     case CIDCoding::kUTF16:
318       return static_cast<wchar_t>(charcode);
319     case CIDCoding::kCID:
320       if (!m_pCID2UnicodeMap || !m_pCID2UnicodeMap->IsLoaded())
321         return 0;
322       return m_pCID2UnicodeMap->UnicodeFromCID(static_cast<uint16_t>(charcode));
323     default:
324       break;
325   }
326   if (m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded() && m_pCMap->IsLoaded())
327     return m_pCID2UnicodeMap->UnicodeFromCID(CIDFromCharCode(charcode));
328 
329 #if BUILDFLAG(IS_WIN)
330   uint8_t sequence[2] = {};
331   const int charsize = charcode < 256 ? 1 : 2;
332   if (charsize == 1) {
333     sequence[0] = charcode;
334   } else {
335     sequence[0] = charcode / 256;
336     sequence[1] = charcode % 256;
337   }
338   wchar_t unicode;
339   size_t ret = FX_MultiByteToWideChar(
340       kCharsetCodePages[static_cast<size_t>(m_pCMap->GetCoding())],
341       ByteStringView(pdfium::make_span(sequence).first(charsize)),
342       pdfium::span_from_ref(unicode));
343   return ret == 1 ? unicode : 0;
344 #else
345   if (!m_pCMap->GetEmbedMap())
346     return 0;
347   return EmbeddedUnicodeFromCharcode(m_pCMap->GetEmbedMap(),
348                                      m_pCMap->GetCharset(), charcode);
349 #endif
350 }
351 
CharCodeFromUnicode(wchar_t unicode) const352 uint32_t CPDF_CIDFont::CharCodeFromUnicode(wchar_t unicode) const {
353   uint32_t charcode = CPDF_Font::CharCodeFromUnicode(unicode);
354   if (charcode)
355     return charcode;
356 
357   switch (m_pCMap->GetCoding()) {
358     case CIDCoding::kUNKNOWN:
359       return 0;
360     case CIDCoding::kUCS2:
361     case CIDCoding::kUTF16:
362       return unicode;
363     case CIDCoding::kCID: {
364       if (!m_pCID2UnicodeMap || !m_pCID2UnicodeMap->IsLoaded())
365         return 0;
366       uint32_t cid = 0;
367       while (cid < 65536) {
368         wchar_t this_unicode =
369             m_pCID2UnicodeMap->UnicodeFromCID(static_cast<uint16_t>(cid));
370         if (this_unicode == unicode)
371           return cid;
372         cid++;
373       }
374       break;
375     }
376     default:
377       break;
378   }
379 
380   if (unicode < 0x80)
381     return static_cast<uint32_t>(unicode);
382   if (m_pCMap->GetCoding() == CIDCoding::kCID)
383     return 0;
384 #if BUILDFLAG(IS_WIN)
385   uint8_t buffer[32];
386   size_t ret = FX_WideCharToMultiByte(
387       kCharsetCodePages[static_cast<size_t>(m_pCMap->GetCoding())],
388       WideStringView(unicode),
389       pdfium::as_writable_chars(pdfium::make_span(buffer).first(4u)));
390   if (ret == 1)
391     return buffer[0];
392   if (ret == 2)
393     return buffer[0] * 256 + buffer[1];
394 #else
395   if (m_pCMap->GetEmbedMap()) {
396     return EmbeddedCharcodeFromUnicode(m_pCMap->GetEmbedMap(),
397                                        m_pCMap->GetCharset(), unicode);
398   }
399 #endif
400   return 0;
401 }
402 
Load()403 bool CPDF_CIDFont::Load() {
404   if (m_pFontDict->GetByteStringFor("Subtype") == "TrueType") {
405     LoadGB2312();
406     return true;
407   }
408 
409   RetainPtr<const CPDF_Array> pFonts =
410       m_pFontDict->GetArrayFor("DescendantFonts");
411   if (!pFonts || pFonts->size() != 1)
412     return false;
413 
414   RetainPtr<const CPDF_Dictionary> pCIDFontDict = pFonts->GetDictAt(0);
415   if (!pCIDFontDict)
416     return false;
417 
418   m_BaseFontName = pCIDFontDict->GetByteStringFor("BaseFont");
419   if ((m_BaseFontName == "CourierStd" || m_BaseFontName == "CourierStd-Bold" ||
420        m_BaseFontName == "CourierStd-BoldOblique" ||
421        m_BaseFontName == "CourierStd-Oblique") &&
422       !IsEmbedded()) {
423     m_bAdobeCourierStd = true;
424   }
425 
426   RetainPtr<const CPDF_Object> pEncoding =
427       m_pFontDict->GetDirectObjectFor("Encoding");
428   if (!pEncoding)
429     return false;
430 
431   ByteString subtype = pCIDFontDict->GetByteStringFor("Subtype");
432   m_FontType =
433       subtype == "CIDFontType0" ? CIDFontType::kType1 : CIDFontType::kTrueType;
434 
435   if (!pEncoding->IsName() && !pEncoding->IsStream())
436     return false;
437 
438   auto* pFontGlobals = CPDF_FontGlobals::GetInstance();
439   const CPDF_Stream* pEncodingStream = pEncoding->AsStream();
440   if (pEncodingStream) {
441     auto pAcc =
442         pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pEncodingStream));
443     pAcc->LoadAllDataFiltered();
444     pdfium::span<const uint8_t> span = pAcc->GetSpan();
445     m_pCMap = pdfium::MakeRetain<CPDF_CMap>(span);
446   } else {
447     DCHECK(pEncoding->IsName());
448     ByteString cmap = pEncoding->GetString();
449     m_pCMap = pFontGlobals->GetPredefinedCMap(cmap);
450   }
451 
452   RetainPtr<const CPDF_Dictionary> pFontDesc =
453       pCIDFontDict->GetDictFor("FontDescriptor");
454   if (pFontDesc)
455     LoadFontDescriptor(pFontDesc.Get());
456 
457   m_Charset = m_pCMap->GetCharset();
458   if (m_Charset == CIDSET_UNKNOWN) {
459     RetainPtr<const CPDF_Dictionary> pCIDInfo =
460         pCIDFontDict->GetDictFor("CIDSystemInfo");
461     if (pCIDInfo) {
462       m_Charset = CPDF_CMapParser::CharsetFromOrdering(
463           pCIDInfo->GetByteStringFor("Ordering").AsStringView());
464     }
465   }
466   if (m_Charset != CIDSET_UNKNOWN) {
467     m_pCID2UnicodeMap = pFontGlobals->GetCID2UnicodeMap(m_Charset);
468   }
469   RetainPtr<CFX_Face> face = m_Font.GetFace();
470   if (face) {
471     if (m_FontType == CIDFontType::kType1) {
472       face->SelectCharMap(fxge::FontEncoding::kUnicode);
473     } else {
474       UseCIDCharmap(face, m_pCMap->GetCoding());
475     }
476   }
477   m_DefaultWidth = pCIDFontDict->GetIntegerFor("DW", 1000);
478   RetainPtr<const CPDF_Array> pWidthArray = pCIDFontDict->GetArrayFor("W");
479   if (pWidthArray)
480     LoadMetricsArray(std::move(pWidthArray), &m_WidthList, 1);
481 
482   if (!IsEmbedded())
483     LoadSubstFont();
484 
485   RetainPtr<const CPDF_Object> pmap =
486       pCIDFontDict->GetDirectObjectFor("CIDToGIDMap");
487   if (pmap) {
488     RetainPtr<const CPDF_Stream> pMapStream(pmap->AsStream());
489     if (pMapStream) {
490       m_pStreamAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pMapStream));
491       m_pStreamAcc->LoadAllDataFiltered();
492     } else if (m_pFontFile && pmap->IsName() &&
493                pmap->GetString() == "Identity") {
494       m_bCIDIsGID = true;
495     }
496   }
497 
498   CheckFontMetrics();
499   if (IsVertWriting()) {
500     RetainPtr<const CPDF_Array> pWidth2Array = pCIDFontDict->GetArrayFor("W2");
501     if (pWidth2Array)
502       LoadMetricsArray(std::move(pWidth2Array), &m_VertMetrics, 3);
503 
504     RetainPtr<const CPDF_Array> pDefaultArray =
505         pCIDFontDict->GetArrayFor("DW2");
506     if (pDefaultArray) {
507       m_DefaultVY = pDefaultArray->GetIntegerAt(0);
508       m_DefaultW1 = pDefaultArray->GetIntegerAt(1);
509     }
510   }
511 
512   // TODO(thestig): Better identify font types and identify more font types.
513   if (m_FontType == CIDFontType::kTrueType && IsEmbedded())
514     m_Font.SetFontType(CFX_Font::FontType::kCIDTrueType);
515 
516   return true;
517 }
518 
GetCharBBox(uint32_t charcode)519 FX_RECT CPDF_CIDFont::GetCharBBox(uint32_t charcode) {
520   if (charcode < 256 && m_CharBBox[charcode].right != -1)
521     return m_CharBBox[charcode];
522 
523   FX_RECT rect;
524   bool bVert = false;
525   int glyph_index = GlyphFromCharCode(charcode, &bVert);
526   RetainPtr<CFX_Face> face = m_Font.GetFace();
527   if (face) {
528     rect = face->GetCharBBox(charcode, glyph_index);
529   }
530   if (!m_pFontFile && m_Charset == CIDSET_JAPAN1) {
531     uint16_t cid = CIDFromCharCode(charcode);
532     const CIDTransform* pTransform = GetCIDTransform(cid);
533     if (pTransform && !bVert) {
534       CFX_Matrix matrix(CIDTransformToFloat(pTransform->a),
535                         CIDTransformToFloat(pTransform->b),
536                         CIDTransformToFloat(pTransform->c),
537                         CIDTransformToFloat(pTransform->d),
538                         CIDTransformToFloat(pTransform->e) * 1000,
539                         CIDTransformToFloat(pTransform->f) * 1000);
540       rect = matrix.TransformRect(CFX_FloatRect(rect)).GetOuterRect();
541     }
542   }
543   if (charcode < 256)
544     m_CharBBox[charcode] = rect;
545 
546   return rect;
547 }
548 
GetCharWidthF(uint32_t charcode)549 int CPDF_CIDFont::GetCharWidthF(uint32_t charcode) {
550   if (charcode < 0x80 && m_bAnsiWidthsFixed) {
551     return (charcode >= 32 && charcode < 127) ? 500 : 0;
552   }
553   uint16_t cid = CIDFromCharCode(charcode);
554   auto lhv_span =
555       fxcrt::reinterpret_span<const LowHighVal>(pdfium::make_span(m_WidthList));
556   for (const auto& lhv : lhv_span) {
557     if (IsMetricForCID(lhv, cid)) {
558       return lhv.val;
559     }
560   }
561   return m_DefaultWidth;
562 }
563 
GetVertWidth(uint16_t cid) const564 int16_t CPDF_CIDFont::GetVertWidth(uint16_t cid) const {
565   auto lhvxy_span = fxcrt::reinterpret_span<const LowHighValXY>(
566       pdfium::make_span(m_VertMetrics));
567   for (const auto& lhvxy : lhvxy_span) {
568     if (IsMetricForCID(lhvxy, cid)) {
569       return lhvxy.val;
570     }
571   }
572   return m_DefaultW1;
573 }
574 
GetVertOrigin(uint16_t cid) const575 CFX_Point16 CPDF_CIDFont::GetVertOrigin(uint16_t cid) const {
576   auto lhvxy_span = fxcrt::reinterpret_span<const LowHighValXY>(
577       pdfium::make_span(m_VertMetrics));
578   for (const auto& lhvxy : lhvxy_span) {
579     if (IsMetricForCID(lhvxy, cid)) {
580       return {static_cast<int16_t>(lhvxy.x), static_cast<int16_t>(lhvxy.y)};
581     }
582   }
583   int width = m_DefaultWidth;
584   auto lhv_span =
585       fxcrt::reinterpret_span<const LowHighVal>(pdfium::make_span(m_WidthList));
586   for (const auto& lhv : lhv_span) {
587     if (IsMetricForCID(lhv, cid)) {
588       width = lhv.val;
589       break;
590     }
591   }
592   return {static_cast<int16_t>(width / 2), m_DefaultVY};
593 }
594 
GetGlyphIndex(uint32_t unicode,bool * pVertGlyph)595 int CPDF_CIDFont::GetGlyphIndex(uint32_t unicode, bool* pVertGlyph) {
596   if (pVertGlyph)
597     *pVertGlyph = false;
598 
599   int index = m_Font.GetFace()->GetCharIndex(unicode);
600   if (unicode == pdfium::unicode::kBoxDrawingsLightVerical)
601     return index;
602 
603   if (!index || !IsVertWriting())
604     return index;
605 
606   if (m_pTTGSUBTable)
607     return GetVerticalGlyph(index, pVertGlyph);
608 
609   static constexpr uint32_t kGsubTag =
610       CFX_FontMapper::MakeTag('G', 'S', 'U', 'B');
611   RetainPtr<CFX_Face> face = m_Font.GetFace();
612   size_t length = face->GetSfntTable(kGsubTag, {});
613   if (!length) {
614     return index;
615   }
616 
617   auto sub_data = FixedSizeDataVector<uint8_t>::Uninit(length);
618   if (!face->GetSfntTable(kGsubTag, sub_data.span())) {
619     return index;
620   }
621 
622   // CFX_CTTGSUBTable parses the data and stores all the values in its structs.
623   // It does not store pointers into `sub_data`.
624   m_pTTGSUBTable = std::make_unique<CFX_CTTGSUBTable>(sub_data.span());
625   return GetVerticalGlyph(index, pVertGlyph);
626 }
627 
GetVerticalGlyph(int index,bool * pVertGlyph)628 int CPDF_CIDFont::GetVerticalGlyph(int index, bool* pVertGlyph) {
629   uint32_t vindex = m_pTTGSUBTable->GetVerticalGlyph(index);
630   if (!vindex)
631     return index;
632 
633   index = vindex;
634   if (pVertGlyph)
635     *pVertGlyph = true;
636   return index;
637 }
638 
GlyphFromCharCode(uint32_t charcode,bool * pVertGlyph)639 int CPDF_CIDFont::GlyphFromCharCode(uint32_t charcode, bool* pVertGlyph) {
640   if (pVertGlyph)
641     *pVertGlyph = false;
642 
643   if (!m_pFontFile && (!m_pStreamAcc || m_pCID2UnicodeMap)) {
644     uint16_t cid = CIDFromCharCode(charcode);
645     wchar_t unicode = 0;
646     if (m_bCIDIsGID) {
647 #if BUILDFLAG(IS_APPLE)
648       if (FontStyleIsSymbolic(m_Flags))
649         return cid;
650 
651       WideString uni_str = UnicodeFromCharCode(charcode);
652       if (uni_str.IsEmpty())
653         return cid;
654 
655       unicode = uni_str[0];
656 #else
657       return cid;
658 #endif
659     } else {
660       if (cid && m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded())
661         unicode = m_pCID2UnicodeMap->UnicodeFromCID(cid);
662       if (unicode == 0)
663         unicode = GetUnicodeFromCharCode(charcode);
664       if (unicode == 0) {
665         WideString unicode_str = UnicodeFromCharCode(charcode);
666         if (!unicode_str.IsEmpty())
667           unicode = unicode_str[0];
668       }
669     }
670     if (unicode == 0) {
671       if (!m_bAdobeCourierStd)
672         return charcode ? static_cast<int>(charcode) : -1;
673 
674       charcode += 31;
675       RetainPtr<CFX_Face> face = m_Font.GetFace();
676       bool bMSUnicode = UseTTCharmapMSUnicode(face);
677       bool bMacRoman = !bMSUnicode && UseTTCharmapMacRoman(face);
678       FontEncoding base_encoding = FontEncoding::kStandard;
679       if (bMSUnicode)
680         base_encoding = FontEncoding::kWinAnsi;
681       else if (bMacRoman)
682         base_encoding = FontEncoding::kMacRoman;
683       const char* name =
684           GetAdobeCharName(base_encoding, std::vector<ByteString>(), charcode);
685       if (!name)
686         return charcode ? static_cast<int>(charcode) : -1;
687 
688       int index = 0;
689       uint16_t name_unicode = UnicodeFromAdobeName(name);
690       if (!name_unicode)
691         return charcode ? static_cast<int>(charcode) : -1;
692 
693       if (base_encoding == FontEncoding::kStandard) {
694         return face->GetCharIndex(name_unicode);
695       }
696 
697       if (base_encoding == FontEncoding::kWinAnsi) {
698         index = face->GetCharIndex(name_unicode);
699       } else {
700         DCHECK_EQ(base_encoding, FontEncoding::kMacRoman);
701         uint32_t maccode = CharCodeFromUnicodeForEncoding(
702             fxge::FontEncoding::kAppleRoman, name_unicode);
703         index =
704             maccode ? face->GetCharIndex(maccode) : face->GetNameIndex(name);
705       }
706       if (index == 0 || index == 0xffff)
707         return charcode ? static_cast<int>(charcode) : -1;
708       return index;
709     }
710     if (m_Charset == CIDSET_JAPAN1) {
711       if (unicode == '\\') {
712         unicode = '/';
713 #if !BUILDFLAG(IS_APPLE)
714       } else if (unicode == 0xa5) {
715         unicode = 0x5c;
716 #endif
717       }
718     }
719 
720     RetainPtr<CFX_Face> face = m_Font.GetFace();
721     if (!face) {
722       return unicode;
723     }
724 
725     size_t num_charmaps = face->GetCharMapCount();
726     if (!face->SelectCharMap(fxge::FontEncoding::kUnicode)) {
727       size_t i;
728       for (i = 0; i < num_charmaps; i++) {
729         uint32_t ret = CharCodeFromUnicodeForEncoding(
730             face->GetCharMapEncodingByIndex(i), static_cast<wchar_t>(charcode));
731         if (ret == 0)
732           continue;
733         face->SetCharMapByIndex(i);
734         unicode = static_cast<wchar_t>(ret);
735         break;
736       }
737       if (i == num_charmaps && i) {
738         face->SetCharMapByIndex(0);
739         unicode = static_cast<wchar_t>(charcode);
740       }
741     }
742     if (num_charmaps) {
743       int index = GetGlyphIndex(unicode, pVertGlyph);
744       return index != 0 ? index : -1;
745     }
746     return unicode;
747   }
748 
749   RetainPtr<CFX_Face> face = m_Font.GetFace();
750   if (!face) {
751     return -1;
752   }
753 
754   uint16_t cid = CIDFromCharCode(charcode);
755   if (!m_pStreamAcc) {
756     if (m_FontType == CIDFontType::kType1) {
757       return cid;
758     }
759     if (m_pFontFile && m_pCMap->IsDirectCharcodeToCIDTableIsEmpty()) {
760       return cid;
761     }
762     if (m_pCMap->GetCoding() == CIDCoding::kUNKNOWN) {
763       return cid;
764     }
765 
766     std::optional<fxge::FontEncoding> charmap =
767         face->GetCurrentCharMapEncoding();
768     if (!charmap.has_value()) {
769       return cid;
770     }
771 
772     if (charmap.value() == fxge::FontEncoding::kUnicode) {
773       WideString unicode_str = UnicodeFromCharCode(charcode);
774       if (unicode_str.IsEmpty())
775         return -1;
776 
777       charcode = unicode_str[0];
778     }
779     return GetGlyphIndex(charcode, pVertGlyph);
780   }
781   uint32_t byte_pos = cid * 2;
782   if (byte_pos + 2 > m_pStreamAcc->GetSize())
783     return -1;
784 
785   pdfium::span<const uint8_t> span = m_pStreamAcc->GetSpan().subspan(byte_pos);
786   return span[0] * 256 + span[1];
787 }
788 
GetNextChar(ByteStringView pString,size_t * pOffset) const789 uint32_t CPDF_CIDFont::GetNextChar(ByteStringView pString,
790                                    size_t* pOffset) const {
791   return m_pCMap->GetNextChar(pString, pOffset);
792 }
793 
GetCharSize(uint32_t charcode) const794 int CPDF_CIDFont::GetCharSize(uint32_t charcode) const {
795   return m_pCMap->GetCharSize(charcode);
796 }
797 
CountChar(ByteStringView pString) const798 size_t CPDF_CIDFont::CountChar(ByteStringView pString) const {
799   return m_pCMap->CountChar(pString);
800 }
801 
AppendChar(ByteString * str,uint32_t charcode) const802 void CPDF_CIDFont::AppendChar(ByteString* str, uint32_t charcode) const {
803   m_pCMap->AppendChar(str, charcode);
804 }
805 
IsUnicodeCompatible() const806 bool CPDF_CIDFont::IsUnicodeCompatible() const {
807   if (m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded() && m_pCMap->IsLoaded())
808     return true;
809   return m_pCMap->GetCoding() != CIDCoding::kUNKNOWN;
810 }
811 
LoadSubstFont()812 void CPDF_CIDFont::LoadSubstFont() {
813   FX_SAFE_INT32 safeStemV(m_StemV);
814   safeStemV *= 5;
815   m_Font.LoadSubst(m_BaseFontName, m_FontType == CIDFontType::kTrueType,
816                    m_Flags, safeStemV.ValueOrDefault(FXFONT_FW_NORMAL),
817                    m_ItalicAngle, kCharsetCodePages[m_Charset],
818                    IsVertWriting());
819 }
820 
821 // static
CIDTransformToFloat(uint8_t ch)822 float CPDF_CIDFont::CIDTransformToFloat(uint8_t ch) {
823   return (ch < 128 ? ch : ch - 255) * (1.0f / 127);
824 }
825 
LoadGB2312()826 void CPDF_CIDFont::LoadGB2312() {
827   m_BaseFontName = m_pFontDict->GetByteStringFor("BaseFont");
828   m_Charset = CIDSET_GB1;
829 
830   auto* pFontGlobals = CPDF_FontGlobals::GetInstance();
831   m_pCMap = pFontGlobals->GetPredefinedCMap("GBK-EUC-H");
832   m_pCID2UnicodeMap = pFontGlobals->GetCID2UnicodeMap(m_Charset);
833   RetainPtr<const CPDF_Dictionary> pFontDesc =
834       m_pFontDict->GetDictFor("FontDescriptor");
835   if (pFontDesc)
836     LoadFontDescriptor(pFontDesc.Get());
837 
838   if (!IsEmbedded())
839     LoadSubstFont();
840   CheckFontMetrics();
841   m_bAnsiWidthsFixed = true;
842 }
843 
GetCIDTransform(uint16_t cid) const844 const CIDTransform* CPDF_CIDFont::GetCIDTransform(uint16_t cid) const {
845   if (m_Charset != CIDSET_JAPAN1 || m_pFontFile)
846     return nullptr;
847 
848   const auto* pBegin = std::begin(kJapan1VerticalCIDs);
849   const auto* pEnd = std::end(kJapan1VerticalCIDs);
850   const auto* pTransform = std::lower_bound(
851       pBegin, pEnd, cid,
852       [](const CIDTransform& entry, uint16_t cid) { return entry.cid < cid; });
853 
854   return pTransform < pEnd && cid == pTransform->cid ? pTransform : nullptr;
855 }
856