• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/font/font_int.h"
8 
9 #include <memory>
10 #include <utility>
11 
12 #include "core/fpdfapi/cmaps/cmap_int.h"
13 #include "core/fpdfapi/cpdf_modulemgr.h"
14 #include "core/fpdfapi/font/ttgsubtable.h"
15 #include "core/fpdfapi/page/cpdf_pagemodule.h"
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
18 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
19 #include "core/fxcrt/fx_ext.h"
20 #include "core/fxge/fx_freetype.h"
21 #include "third_party/base/logging.h"
22 #include "third_party/base/stl_util.h"
23 
24 namespace {
25 
26 const FX_CHAR* const g_CharsetNames[CIDSET_NUM_SETS] = {
27     nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"};
28 
29 class CPDF_PredefinedCMap {
30  public:
31   const FX_CHAR* m_pName;
32   CIDSet m_Charset;
33   CIDCoding m_Coding;
34   CPDF_CMap::CodingScheme m_CodingScheme;
35   uint8_t m_LeadingSegCount;
36   uint8_t m_LeadingSegs[4];
37 };
38 
39 const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
40     {"GB-EUC",
41      CIDSET_GB1,
42      CIDCODING_GB,
43      CPDF_CMap::MixedTwoBytes,
44      1,
45      {0xa1, 0xfe}},
46     {"GBpc-EUC",
47      CIDSET_GB1,
48      CIDCODING_GB,
49      CPDF_CMap::MixedTwoBytes,
50      1,
51      {0xa1, 0xfc}},
52     {"GBK-EUC",
53      CIDSET_GB1,
54      CIDCODING_GB,
55      CPDF_CMap::MixedTwoBytes,
56      1,
57      {0x81, 0xfe}},
58     {"GBKp-EUC",
59      CIDSET_GB1,
60      CIDCODING_GB,
61      CPDF_CMap::MixedTwoBytes,
62      1,
63      {0x81, 0xfe}},
64     {"GBK2K-EUC",
65      CIDSET_GB1,
66      CIDCODING_GB,
67      CPDF_CMap::MixedTwoBytes,
68      1,
69      {0x81, 0xfe}},
70     {"GBK2K",
71      CIDSET_GB1,
72      CIDCODING_GB,
73      CPDF_CMap::MixedTwoBytes,
74      1,
75      {0x81, 0xfe}},
76     {"UniGB-UCS2", CIDSET_GB1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
77     {"UniGB-UTF16", CIDSET_GB1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
78     {"B5pc",
79      CIDSET_CNS1,
80      CIDCODING_BIG5,
81      CPDF_CMap::MixedTwoBytes,
82      1,
83      {0xa1, 0xfc}},
84     {"HKscs-B5",
85      CIDSET_CNS1,
86      CIDCODING_BIG5,
87      CPDF_CMap::MixedTwoBytes,
88      1,
89      {0x88, 0xfe}},
90     {"ETen-B5",
91      CIDSET_CNS1,
92      CIDCODING_BIG5,
93      CPDF_CMap::MixedTwoBytes,
94      1,
95      {0xa1, 0xfe}},
96     {"ETenms-B5",
97      CIDSET_CNS1,
98      CIDCODING_BIG5,
99      CPDF_CMap::MixedTwoBytes,
100      1,
101      {0xa1, 0xfe}},
102     {"UniCNS-UCS2", CIDSET_CNS1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
103     {"UniCNS-UTF16", CIDSET_CNS1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
104     {"83pv-RKSJ",
105      CIDSET_JAPAN1,
106      CIDCODING_JIS,
107      CPDF_CMap::MixedTwoBytes,
108      2,
109      {0x81, 0x9f, 0xe0, 0xfc}},
110     {"90ms-RKSJ",
111      CIDSET_JAPAN1,
112      CIDCODING_JIS,
113      CPDF_CMap::MixedTwoBytes,
114      2,
115      {0x81, 0x9f, 0xe0, 0xfc}},
116     {"90msp-RKSJ",
117      CIDSET_JAPAN1,
118      CIDCODING_JIS,
119      CPDF_CMap::MixedTwoBytes,
120      2,
121      {0x81, 0x9f, 0xe0, 0xfc}},
122     {"90pv-RKSJ",
123      CIDSET_JAPAN1,
124      CIDCODING_JIS,
125      CPDF_CMap::MixedTwoBytes,
126      2,
127      {0x81, 0x9f, 0xe0, 0xfc}},
128     {"Add-RKSJ",
129      CIDSET_JAPAN1,
130      CIDCODING_JIS,
131      CPDF_CMap::MixedTwoBytes,
132      2,
133      {0x81, 0x9f, 0xe0, 0xfc}},
134     {"EUC",
135      CIDSET_JAPAN1,
136      CIDCODING_JIS,
137      CPDF_CMap::MixedTwoBytes,
138      2,
139      {0x8e, 0x8e, 0xa1, 0xfe}},
140     {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}},
141     {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}},
142     {"Ext-RKSJ",
143      CIDSET_JAPAN1,
144      CIDCODING_JIS,
145      CPDF_CMap::MixedTwoBytes,
146      2,
147      {0x81, 0x9f, 0xe0, 0xfc}},
148     {"UniJIS-UCS2", CIDSET_JAPAN1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
149     {"UniJIS-UCS2-HW",
150      CIDSET_JAPAN1,
151      CIDCODING_UCS2,
152      CPDF_CMap::TwoBytes,
153      0,
154      {}},
155     {"UniJIS-UTF16",
156      CIDSET_JAPAN1,
157      CIDCODING_UTF16,
158      CPDF_CMap::TwoBytes,
159      0,
160      {}},
161     {"KSC-EUC",
162      CIDSET_KOREA1,
163      CIDCODING_KOREA,
164      CPDF_CMap::MixedTwoBytes,
165      1,
166      {0xa1, 0xfe}},
167     {"KSCms-UHC",
168      CIDSET_KOREA1,
169      CIDCODING_KOREA,
170      CPDF_CMap::MixedTwoBytes,
171      1,
172      {0x81, 0xfe}},
173     {"KSCms-UHC-HW",
174      CIDSET_KOREA1,
175      CIDCODING_KOREA,
176      CPDF_CMap::MixedTwoBytes,
177      1,
178      {0x81, 0xfe}},
179     {"KSCpc-EUC",
180      CIDSET_KOREA1,
181      CIDCODING_KOREA,
182      CPDF_CMap::MixedTwoBytes,
183      1,
184      {0xa1, 0xfd}},
185     {"UniKS-UCS2", CIDSET_KOREA1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
186     {"UniKS-UTF16", CIDSET_KOREA1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
187 };
188 
CIDSetFromSizeT(size_t index)189 CIDSet CIDSetFromSizeT(size_t index) {
190   if (index >= CIDSET_NUM_SETS) {
191     NOTREACHED();
192     return CIDSET_UNKNOWN;
193   }
194   return static_cast<CIDSet>(index);
195 }
196 
CMap_GetString(const CFX_ByteStringC & word)197 CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) {
198   if (word.GetLength() <= 2)
199     return CFX_ByteStringC();
200   return CFX_ByteStringC(&word[1], word.GetLength() - 2);
201 }
202 
CompareDWORD(const void * data1,const void * data2)203 int CompareDWORD(const void* data1, const void* data2) {
204   return (*(uint32_t*)data1) - (*(uint32_t*)data2);
205 }
206 
CompareCID(const void * key,const void * element)207 int CompareCID(const void* key, const void* element) {
208   if ((*(uint32_t*)key) < (*(uint32_t*)element)) {
209     return -1;
210   }
211   if ((*(uint32_t*)key) >
212       (*(uint32_t*)element) + ((uint32_t*)element)[1] / 65536) {
213     return 1;
214   }
215   return 0;
216 }
217 
CheckCodeRange(uint8_t * codes,int size,CMap_CodeRange * pRanges,int nRanges)218 int CheckCodeRange(uint8_t* codes,
219                    int size,
220                    CMap_CodeRange* pRanges,
221                    int nRanges) {
222   int iSeg = nRanges - 1;
223   while (iSeg >= 0) {
224     if (pRanges[iSeg].m_CharSize < size) {
225       --iSeg;
226       continue;
227     }
228     int iChar = 0;
229     while (iChar < size) {
230       if (codes[iChar] < pRanges[iSeg].m_Lower[iChar] ||
231           codes[iChar] > pRanges[iSeg].m_Upper[iChar]) {
232         break;
233       }
234       ++iChar;
235     }
236     if (iChar == pRanges[iSeg].m_CharSize)
237       return 2;
238 
239     if (iChar)
240       return (size == pRanges[iSeg].m_CharSize) ? 2 : 1;
241     iSeg--;
242   }
243   return 0;
244 }
245 
GetCharSizeImpl(uint32_t charcode,CMap_CodeRange * pRanges,int iRangesSize)246 int GetCharSizeImpl(uint32_t charcode,
247                     CMap_CodeRange* pRanges,
248                     int iRangesSize) {
249   if (!iRangesSize)
250     return 1;
251 
252   uint8_t codes[4];
253   codes[0] = codes[1] = 0x00;
254   codes[2] = (uint8_t)(charcode >> 8 & 0xFF);
255   codes[3] = (uint8_t)charcode;
256   int offset = 0;
257   int size = 4;
258   for (int i = 0; i < 4; ++i) {
259     int iSeg = iRangesSize - 1;
260     while (iSeg >= 0) {
261       if (pRanges[iSeg].m_CharSize < size) {
262         --iSeg;
263         continue;
264       }
265       int iChar = 0;
266       while (iChar < size) {
267         if (codes[offset + iChar] < pRanges[iSeg].m_Lower[iChar] ||
268             codes[offset + iChar] > pRanges[iSeg].m_Upper[iChar]) {
269           break;
270         }
271         ++iChar;
272       }
273       if (iChar == pRanges[iSeg].m_CharSize)
274         return size;
275       --iSeg;
276     }
277     --size;
278     ++offset;
279   }
280   return 1;
281 }
282 
283 }  // namespace
284 
CPDF_CMapManager()285 CPDF_CMapManager::CPDF_CMapManager() {}
286 
~CPDF_CMapManager()287 CPDF_CMapManager::~CPDF_CMapManager() {}
288 
GetPredefinedCMap(const CFX_ByteString & name,bool bPromptCJK)289 CFX_MaybeOwned<CPDF_CMap> CPDF_CMapManager::GetPredefinedCMap(
290     const CFX_ByteString& name,
291     bool bPromptCJK) {
292   auto it = m_CMaps.find(name);
293   if (it != m_CMaps.end())
294     return CFX_MaybeOwned<CPDF_CMap>(it->second.get());  // Unowned.
295 
296   std::unique_ptr<CPDF_CMap> pCMap = LoadPredefinedCMap(name, bPromptCJK);
297   if (name.IsEmpty())
298     return CFX_MaybeOwned<CPDF_CMap>(std::move(pCMap));  // Owned.
299 
300   CPDF_CMap* pUnowned = pCMap.get();
301   m_CMaps[name] = std::move(pCMap);
302   return CFX_MaybeOwned<CPDF_CMap>(pUnowned);  // Unowned.
303 }
304 
LoadPredefinedCMap(const CFX_ByteString & name,bool bPromptCJK)305 std::unique_ptr<CPDF_CMap> CPDF_CMapManager::LoadPredefinedCMap(
306     const CFX_ByteString& name,
307     bool bPromptCJK) {
308   auto pCMap = pdfium::MakeUnique<CPDF_CMap>();
309   const FX_CHAR* pname = name.c_str();
310   if (*pname == '/')
311     pname++;
312 
313   pCMap->LoadPredefined(this, pname, bPromptCJK);
314   return pCMap;
315 }
316 
GetCID2UnicodeMap(CIDSet charset,bool bPromptCJK)317 CPDF_CID2UnicodeMap* CPDF_CMapManager::GetCID2UnicodeMap(CIDSet charset,
318                                                          bool bPromptCJK) {
319   if (!m_CID2UnicodeMaps[charset])
320     m_CID2UnicodeMaps[charset] = LoadCID2UnicodeMap(charset, bPromptCJK);
321 
322   return m_CID2UnicodeMaps[charset].get();
323 }
324 
LoadCID2UnicodeMap(CIDSet charset,bool bPromptCJK)325 std::unique_ptr<CPDF_CID2UnicodeMap> CPDF_CMapManager::LoadCID2UnicodeMap(
326     CIDSet charset,
327     bool bPromptCJK) {
328   auto pMap = pdfium::MakeUnique<CPDF_CID2UnicodeMap>();
329   pMap->Load(this, charset, bPromptCJK);
330   return pMap;
331 }
332 
CPDF_CMapParser()333 CPDF_CMapParser::CPDF_CMapParser()
334     : m_pCMap(nullptr), m_Status(0), m_CodeSeq(0) {}
335 
~CPDF_CMapParser()336 CPDF_CMapParser::~CPDF_CMapParser() {}
337 
Initialize(CPDF_CMap * pCMap)338 void CPDF_CMapParser::Initialize(CPDF_CMap* pCMap) {
339   m_pCMap = pCMap;
340   m_Status = 0;
341   m_CodeSeq = 0;
342   m_AddMaps.EstimateSize(0, 10240);
343 }
344 
ParseWord(const CFX_ByteStringC & word)345 void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) {
346   if (word.IsEmpty()) {
347     return;
348   }
349   if (word == "begincidchar") {
350     m_Status = 1;
351     m_CodeSeq = 0;
352   } else if (word == "begincidrange") {
353     m_Status = 2;
354     m_CodeSeq = 0;
355   } else if (word == "endcidrange" || word == "endcidchar") {
356     m_Status = 0;
357   } else if (word == "/WMode") {
358     m_Status = 6;
359   } else if (word == "/Registry") {
360     m_Status = 3;
361   } else if (word == "/Ordering") {
362     m_Status = 4;
363   } else if (word == "/Supplement") {
364     m_Status = 5;
365   } else if (word == "begincodespacerange") {
366     m_Status = 7;
367     m_CodeSeq = 0;
368   } else if (word == "usecmap") {
369   } else if (m_Status == 1 || m_Status == 2) {
370     m_CodePoints[m_CodeSeq] = CMap_GetCode(word);
371     m_CodeSeq++;
372     uint32_t StartCode, EndCode;
373     uint16_t StartCID;
374     if (m_Status == 1) {
375       if (m_CodeSeq < 2) {
376         return;
377       }
378       EndCode = StartCode = m_CodePoints[0];
379       StartCID = (uint16_t)m_CodePoints[1];
380     } else {
381       if (m_CodeSeq < 3) {
382         return;
383       }
384       StartCode = m_CodePoints[0];
385       EndCode = m_CodePoints[1];
386       StartCID = (uint16_t)m_CodePoints[2];
387     }
388     if (EndCode < 0x10000) {
389       for (uint32_t code = StartCode; code <= EndCode; code++) {
390         m_pCMap->m_pMapping[code] = (uint16_t)(StartCID + code - StartCode);
391       }
392     } else {
393       uint32_t buf[2];
394       buf[0] = StartCode;
395       buf[1] = ((EndCode - StartCode) << 16) + StartCID;
396       m_AddMaps.AppendBlock(buf, sizeof buf);
397     }
398     m_CodeSeq = 0;
399   } else if (m_Status == 3) {
400     m_Status = 0;
401   } else if (m_Status == 4) {
402     m_pCMap->m_Charset = CharsetFromOrdering(CMap_GetString(word));
403     m_Status = 0;
404   } else if (m_Status == 5) {
405     m_Status = 0;
406   } else if (m_Status == 6) {
407     m_pCMap->m_bVertical = CMap_GetCode(word) != 0;
408     m_Status = 0;
409   } else if (m_Status == 7) {
410     if (word == "endcodespacerange") {
411       uint32_t nSegs = pdfium::CollectionSize<uint32_t>(m_CodeRanges);
412       if (nSegs > 1) {
413         m_pCMap->m_CodingScheme = CPDF_CMap::MixedFourBytes;
414         m_pCMap->m_nCodeRanges = nSegs;
415         FX_Free(m_pCMap->m_pLeadingBytes);
416         m_pCMap->m_pLeadingBytes =
417             FX_Alloc2D(uint8_t, nSegs, sizeof(CMap_CodeRange));
418         FXSYS_memcpy(m_pCMap->m_pLeadingBytes, m_CodeRanges.data(),
419                      nSegs * sizeof(CMap_CodeRange));
420       } else if (nSegs == 1) {
421         m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2)
422                                       ? CPDF_CMap::TwoBytes
423                                       : CPDF_CMap::OneByte;
424       }
425       m_Status = 0;
426     } else {
427       if (word.GetLength() == 0 || word.GetAt(0) != '<') {
428         return;
429       }
430       if (m_CodeSeq % 2) {
431         CMap_CodeRange range;
432         if (CMap_GetCodeRange(range, m_LastWord.AsStringC(), word))
433           m_CodeRanges.push_back(range);
434       }
435       m_CodeSeq++;
436     }
437   }
438   m_LastWord = word;
439 }
440 
441 // Static.
CMap_GetCode(const CFX_ByteStringC & word)442 uint32_t CPDF_CMapParser::CMap_GetCode(const CFX_ByteStringC& word) {
443   pdfium::base::CheckedNumeric<uint32_t> num = 0;
444   if (word.GetAt(0) == '<') {
445     for (int i = 1; i < word.GetLength() && std::isxdigit(word.GetAt(i)); ++i) {
446       num = num * 16 + FXSYS_toHexDigit(word.GetAt(i));
447       if (!num.IsValid())
448         return 0;
449     }
450     return num.ValueOrDie();
451   }
452 
453   for (int i = 0; i < word.GetLength() && std::isdigit(word.GetAt(i)); ++i) {
454     num = num * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(word.GetAt(i)));
455     if (!num.IsValid())
456       return 0;
457   }
458   return num.ValueOrDie();
459 }
460 
461 // Static.
CMap_GetCodeRange(CMap_CodeRange & range,const CFX_ByteStringC & first,const CFX_ByteStringC & second)462 bool CPDF_CMapParser::CMap_GetCodeRange(CMap_CodeRange& range,
463                                         const CFX_ByteStringC& first,
464                                         const CFX_ByteStringC& second) {
465   if (first.GetLength() == 0 || first.GetAt(0) != '<')
466     return false;
467 
468   int i;
469   for (i = 1; i < first.GetLength(); ++i) {
470     if (first.GetAt(i) == '>') {
471       break;
472     }
473   }
474   range.m_CharSize = (i - 1) / 2;
475   if (range.m_CharSize > 4)
476     return false;
477 
478   for (i = 0; i < range.m_CharSize; ++i) {
479     uint8_t digit1 = first.GetAt(i * 2 + 1);
480     uint8_t digit2 = first.GetAt(i * 2 + 2);
481     range.m_Lower[i] = FXSYS_toHexDigit(digit1) * 16 + FXSYS_toHexDigit(digit2);
482   }
483 
484   uint32_t size = second.GetLength();
485   for (i = 0; i < range.m_CharSize; ++i) {
486     uint8_t digit1 = ((uint32_t)i * 2 + 1 < size)
487                          ? second.GetAt((FX_STRSIZE)i * 2 + 1)
488                          : '0';
489     uint8_t digit2 = ((uint32_t)i * 2 + 2 < size)
490                          ? second.GetAt((FX_STRSIZE)i * 2 + 2)
491                          : '0';
492     range.m_Upper[i] = FXSYS_toHexDigit(digit1) * 16 + FXSYS_toHexDigit(digit2);
493   }
494   return true;
495 }
496 
CPDF_CMap()497 CPDF_CMap::CPDF_CMap() {
498   m_Charset = CIDSET_UNKNOWN;
499   m_Coding = CIDCODING_UNKNOWN;
500   m_CodingScheme = TwoBytes;
501   m_bVertical = false;
502   m_bLoaded = false;
503   m_pMapping = nullptr;
504   m_pLeadingBytes = nullptr;
505   m_pAddMapping = nullptr;
506   m_pEmbedMap = nullptr;
507   m_nCodeRanges = 0;
508 }
~CPDF_CMap()509 CPDF_CMap::~CPDF_CMap() {
510   FX_Free(m_pMapping);
511   FX_Free(m_pAddMapping);
512   FX_Free(m_pLeadingBytes);
513 }
514 
IsLoaded() const515 bool CPDF_CMap::IsLoaded() const {
516   return m_bLoaded;
517 }
518 
IsVertWriting() const519 bool CPDF_CMap::IsVertWriting() const {
520   return m_bVertical;
521 }
522 
LoadPredefined(CPDF_CMapManager * pMgr,const CFX_ByteString & bsName,bool bPromptCJK)523 void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr,
524                                const CFX_ByteString& bsName,
525                                bool bPromptCJK) {
526   m_PredefinedCMap = bsName;
527   if (m_PredefinedCMap == "Identity-H" || m_PredefinedCMap == "Identity-V") {
528     m_Coding = CIDCODING_CID;
529     m_bVertical = bsName[9] == 'V';
530     m_bLoaded = true;
531     return;
532   }
533   CFX_ByteString cmapid = m_PredefinedCMap;
534   m_bVertical = cmapid.Right(1) == "V";
535   if (cmapid.GetLength() > 2) {
536     cmapid = cmapid.Left(cmapid.GetLength() - 2);
537   }
538   const CPDF_PredefinedCMap* map = nullptr;
539   for (size_t i = 0; i < FX_ArraySize(g_PredefinedCMaps); ++i) {
540     if (cmapid == CFX_ByteStringC(g_PredefinedCMaps[i].m_pName)) {
541       map = &g_PredefinedCMaps[i];
542       break;
543     }
544   }
545   if (!map)
546     return;
547 
548   m_Charset = map->m_Charset;
549   m_Coding = map->m_Coding;
550   m_CodingScheme = map->m_CodingScheme;
551   if (m_CodingScheme == MixedTwoBytes) {
552     m_pLeadingBytes = FX_Alloc(uint8_t, 256);
553     for (uint32_t i = 0; i < map->m_LeadingSegCount; ++i) {
554       const uint8_t* segs = map->m_LeadingSegs;
555       for (int b = segs[i * 2]; b <= segs[i * 2 + 1]; ++b) {
556         m_pLeadingBytes[b] = 1;
557       }
558     }
559   }
560   FPDFAPI_FindEmbeddedCMap(bsName, m_Charset, m_Coding, m_pEmbedMap);
561   if (!m_pEmbedMap)
562     return;
563 
564   m_bLoaded = true;
565 }
566 
LoadEmbedded(const uint8_t * pData,uint32_t size)567 void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) {
568   m_pMapping = FX_Alloc(uint16_t, 65536);
569   CPDF_CMapParser parser;
570   parser.Initialize(this);
571   CPDF_SimpleParser syntax(pData, size);
572   while (1) {
573     CFX_ByteStringC word = syntax.GetWord();
574     if (word.IsEmpty()) {
575       break;
576     }
577     parser.ParseWord(word);
578   }
579   if (m_CodingScheme == MixedFourBytes && parser.m_AddMaps.GetSize()) {
580     m_pAddMapping = FX_Alloc(uint8_t, parser.m_AddMaps.GetSize() + 4);
581     *(uint32_t*)m_pAddMapping = parser.m_AddMaps.GetSize() / 8;
582     FXSYS_memcpy(m_pAddMapping + 4, parser.m_AddMaps.GetBuffer(),
583                  parser.m_AddMaps.GetSize());
584     FXSYS_qsort(m_pAddMapping + 4, parser.m_AddMaps.GetSize() / 8, 8,
585                 CompareDWORD);
586   }
587 }
588 
CIDFromCharCode(uint32_t charcode) const589 uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const {
590   if (m_Coding == CIDCODING_CID) {
591     return (uint16_t)charcode;
592   }
593   if (m_pEmbedMap) {
594     return FPDFAPI_CIDFromCharCode(m_pEmbedMap, charcode);
595   }
596   if (!m_pMapping) {
597     return (uint16_t)charcode;
598   }
599   if (charcode >> 16) {
600     if (m_pAddMapping) {
601       void* found = FXSYS_bsearch(&charcode, m_pAddMapping + 4,
602                                   *(uint32_t*)m_pAddMapping, 8, CompareCID);
603       if (!found)
604         return 0;
605       return (uint16_t)(((uint32_t*)found)[1] % 65536 + charcode -
606                         *(uint32_t*)found);
607     }
608     return 0;
609   }
610   return m_pMapping[charcode];
611 }
612 
GetNextChar(const FX_CHAR * pString,int nStrLen,int & offset) const613 uint32_t CPDF_CMap::GetNextChar(const FX_CHAR* pString,
614                                 int nStrLen,
615                                 int& offset) const {
616   switch (m_CodingScheme) {
617     case OneByte:
618       return ((uint8_t*)pString)[offset++];
619     case TwoBytes:
620       offset += 2;
621       return ((uint8_t*)pString)[offset - 2] * 256 +
622              ((uint8_t*)pString)[offset - 1];
623     case MixedTwoBytes: {
624       uint8_t byte1 = ((uint8_t*)pString)[offset++];
625       if (!m_pLeadingBytes[byte1]) {
626         return byte1;
627       }
628       uint8_t byte2 = ((uint8_t*)pString)[offset++];
629       return byte1 * 256 + byte2;
630     }
631     case MixedFourBytes: {
632       uint8_t codes[4];
633       int char_size = 1;
634       codes[0] = ((uint8_t*)pString)[offset++];
635       CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes;
636       while (1) {
637         int ret = CheckCodeRange(codes, char_size, pRanges, m_nCodeRanges);
638         if (ret == 0) {
639           return 0;
640         }
641         if (ret == 2) {
642           uint32_t charcode = 0;
643           for (int i = 0; i < char_size; i++) {
644             charcode = (charcode << 8) + codes[i];
645           }
646           return charcode;
647         }
648         if (char_size == 4 || offset == nStrLen) {
649           return 0;
650         }
651         codes[char_size++] = ((uint8_t*)pString)[offset++];
652       }
653       break;
654     }
655   }
656   return 0;
657 }
GetCharSize(uint32_t charcode) const658 int CPDF_CMap::GetCharSize(uint32_t charcode) const {
659   switch (m_CodingScheme) {
660     case OneByte:
661       return 1;
662     case TwoBytes:
663       return 2;
664     case MixedTwoBytes:
665     case MixedFourBytes:
666       if (charcode < 0x100) {
667         return 1;
668       }
669       if (charcode < 0x10000) {
670         return 2;
671       }
672       if (charcode < 0x1000000) {
673         return 3;
674       }
675       return 4;
676   }
677   return 1;
678 }
CountChar(const FX_CHAR * pString,int size) const679 int CPDF_CMap::CountChar(const FX_CHAR* pString, int size) const {
680   switch (m_CodingScheme) {
681     case OneByte:
682       return size;
683     case TwoBytes:
684       return (size + 1) / 2;
685     case MixedTwoBytes: {
686       int count = 0;
687       for (int i = 0; i < size; i++) {
688         count++;
689         if (m_pLeadingBytes[((uint8_t*)pString)[i]]) {
690           i++;
691         }
692       }
693       return count;
694     }
695     case MixedFourBytes: {
696       int count = 0, offset = 0;
697       while (offset < size) {
698         GetNextChar(pString, size, offset);
699         count++;
700       }
701       return count;
702     }
703   }
704   return size;
705 }
706 
AppendChar(FX_CHAR * str,uint32_t charcode) const707 int CPDF_CMap::AppendChar(FX_CHAR* str, uint32_t charcode) const {
708   switch (m_CodingScheme) {
709     case OneByte:
710       str[0] = (uint8_t)charcode;
711       return 1;
712     case TwoBytes:
713       str[0] = (uint8_t)(charcode / 256);
714       str[1] = (uint8_t)(charcode % 256);
715       return 2;
716     case MixedTwoBytes:
717     case MixedFourBytes:
718       if (charcode < 0x100) {
719         CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes;
720         int iSize = GetCharSizeImpl(charcode, pRanges, m_nCodeRanges);
721         if (iSize == 0) {
722           iSize = 1;
723         }
724         if (iSize > 1) {
725           FXSYS_memset(str, 0, sizeof(uint8_t) * iSize);
726         }
727         str[iSize - 1] = (uint8_t)charcode;
728         return iSize;
729       }
730       if (charcode < 0x10000) {
731         str[0] = (uint8_t)(charcode >> 8);
732         str[1] = (uint8_t)charcode;
733         return 2;
734       }
735       if (charcode < 0x1000000) {
736         str[0] = (uint8_t)(charcode >> 16);
737         str[1] = (uint8_t)(charcode >> 8);
738         str[2] = (uint8_t)charcode;
739         return 3;
740       }
741       str[0] = (uint8_t)(charcode >> 24);
742       str[1] = (uint8_t)(charcode >> 16);
743       str[2] = (uint8_t)(charcode >> 8);
744       str[3] = (uint8_t)charcode;
745       return 4;
746   }
747   return 0;
748 }
749 
CPDF_CID2UnicodeMap()750 CPDF_CID2UnicodeMap::CPDF_CID2UnicodeMap() {
751   m_EmbeddedCount = 0;
752 }
753 
~CPDF_CID2UnicodeMap()754 CPDF_CID2UnicodeMap::~CPDF_CID2UnicodeMap() {}
755 
IsLoaded()756 bool CPDF_CID2UnicodeMap::IsLoaded() {
757   return m_EmbeddedCount != 0;
758 }
759 
UnicodeFromCID(uint16_t CID)760 FX_WCHAR CPDF_CID2UnicodeMap::UnicodeFromCID(uint16_t CID) {
761   if (m_Charset == CIDSET_UNICODE) {
762     return CID;
763   }
764   if (CID < m_EmbeddedCount) {
765     return m_pEmbeddedMap[CID];
766   }
767   return 0;
768 }
769 
Load(CPDF_CMapManager * pMgr,CIDSet charset,bool bPromptCJK)770 void CPDF_CID2UnicodeMap::Load(CPDF_CMapManager* pMgr,
771                                CIDSet charset,
772                                bool bPromptCJK) {
773   m_Charset = charset;
774 
775   CPDF_FontGlobals* pFontGlobals =
776       CPDF_ModuleMgr::Get()->GetPageModule()->GetFontGlobals();
777   m_pEmbeddedMap = pFontGlobals->m_EmbeddedToUnicodes[charset].m_pMap;
778   m_EmbeddedCount = pFontGlobals->m_EmbeddedToUnicodes[charset].m_Count;
779 }
780 
CharsetFromOrdering(const CFX_ByteStringC & ordering)781 CIDSet CharsetFromOrdering(const CFX_ByteStringC& ordering) {
782   for (size_t charset = 1; charset < FX_ArraySize(g_CharsetNames); ++charset) {
783     if (ordering == g_CharsetNames[charset])
784       return CIDSetFromSizeT(charset);
785   }
786   return CIDSET_UNKNOWN;
787 }
788