1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/font/font_int.h"
8
9 #include <memory>
10 #include <utility>
11
12 #include "core/fpdfapi/cmaps/cmap_int.h"
13 #include "core/fpdfapi/cpdf_modulemgr.h"
14 #include "core/fpdfapi/font/ttgsubtable.h"
15 #include "core/fpdfapi/page/cpdf_pagemodule.h"
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
18 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
19 #include "core/fxcrt/fx_ext.h"
20 #include "core/fxge/fx_freetype.h"
21 #include "third_party/base/logging.h"
22 #include "third_party/base/stl_util.h"
23
24 namespace {
25
26 const FX_CHAR* const g_CharsetNames[CIDSET_NUM_SETS] = {
27 nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"};
28
29 class CPDF_PredefinedCMap {
30 public:
31 const FX_CHAR* m_pName;
32 CIDSet m_Charset;
33 CIDCoding m_Coding;
34 CPDF_CMap::CodingScheme m_CodingScheme;
35 uint8_t m_LeadingSegCount;
36 uint8_t m_LeadingSegs[4];
37 };
38
39 const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
40 {"GB-EUC",
41 CIDSET_GB1,
42 CIDCODING_GB,
43 CPDF_CMap::MixedTwoBytes,
44 1,
45 {0xa1, 0xfe}},
46 {"GBpc-EUC",
47 CIDSET_GB1,
48 CIDCODING_GB,
49 CPDF_CMap::MixedTwoBytes,
50 1,
51 {0xa1, 0xfc}},
52 {"GBK-EUC",
53 CIDSET_GB1,
54 CIDCODING_GB,
55 CPDF_CMap::MixedTwoBytes,
56 1,
57 {0x81, 0xfe}},
58 {"GBKp-EUC",
59 CIDSET_GB1,
60 CIDCODING_GB,
61 CPDF_CMap::MixedTwoBytes,
62 1,
63 {0x81, 0xfe}},
64 {"GBK2K-EUC",
65 CIDSET_GB1,
66 CIDCODING_GB,
67 CPDF_CMap::MixedTwoBytes,
68 1,
69 {0x81, 0xfe}},
70 {"GBK2K",
71 CIDSET_GB1,
72 CIDCODING_GB,
73 CPDF_CMap::MixedTwoBytes,
74 1,
75 {0x81, 0xfe}},
76 {"UniGB-UCS2", CIDSET_GB1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
77 {"UniGB-UTF16", CIDSET_GB1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
78 {"B5pc",
79 CIDSET_CNS1,
80 CIDCODING_BIG5,
81 CPDF_CMap::MixedTwoBytes,
82 1,
83 {0xa1, 0xfc}},
84 {"HKscs-B5",
85 CIDSET_CNS1,
86 CIDCODING_BIG5,
87 CPDF_CMap::MixedTwoBytes,
88 1,
89 {0x88, 0xfe}},
90 {"ETen-B5",
91 CIDSET_CNS1,
92 CIDCODING_BIG5,
93 CPDF_CMap::MixedTwoBytes,
94 1,
95 {0xa1, 0xfe}},
96 {"ETenms-B5",
97 CIDSET_CNS1,
98 CIDCODING_BIG5,
99 CPDF_CMap::MixedTwoBytes,
100 1,
101 {0xa1, 0xfe}},
102 {"UniCNS-UCS2", CIDSET_CNS1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
103 {"UniCNS-UTF16", CIDSET_CNS1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
104 {"83pv-RKSJ",
105 CIDSET_JAPAN1,
106 CIDCODING_JIS,
107 CPDF_CMap::MixedTwoBytes,
108 2,
109 {0x81, 0x9f, 0xe0, 0xfc}},
110 {"90ms-RKSJ",
111 CIDSET_JAPAN1,
112 CIDCODING_JIS,
113 CPDF_CMap::MixedTwoBytes,
114 2,
115 {0x81, 0x9f, 0xe0, 0xfc}},
116 {"90msp-RKSJ",
117 CIDSET_JAPAN1,
118 CIDCODING_JIS,
119 CPDF_CMap::MixedTwoBytes,
120 2,
121 {0x81, 0x9f, 0xe0, 0xfc}},
122 {"90pv-RKSJ",
123 CIDSET_JAPAN1,
124 CIDCODING_JIS,
125 CPDF_CMap::MixedTwoBytes,
126 2,
127 {0x81, 0x9f, 0xe0, 0xfc}},
128 {"Add-RKSJ",
129 CIDSET_JAPAN1,
130 CIDCODING_JIS,
131 CPDF_CMap::MixedTwoBytes,
132 2,
133 {0x81, 0x9f, 0xe0, 0xfc}},
134 {"EUC",
135 CIDSET_JAPAN1,
136 CIDCODING_JIS,
137 CPDF_CMap::MixedTwoBytes,
138 2,
139 {0x8e, 0x8e, 0xa1, 0xfe}},
140 {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}},
141 {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}},
142 {"Ext-RKSJ",
143 CIDSET_JAPAN1,
144 CIDCODING_JIS,
145 CPDF_CMap::MixedTwoBytes,
146 2,
147 {0x81, 0x9f, 0xe0, 0xfc}},
148 {"UniJIS-UCS2", CIDSET_JAPAN1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
149 {"UniJIS-UCS2-HW",
150 CIDSET_JAPAN1,
151 CIDCODING_UCS2,
152 CPDF_CMap::TwoBytes,
153 0,
154 {}},
155 {"UniJIS-UTF16",
156 CIDSET_JAPAN1,
157 CIDCODING_UTF16,
158 CPDF_CMap::TwoBytes,
159 0,
160 {}},
161 {"KSC-EUC",
162 CIDSET_KOREA1,
163 CIDCODING_KOREA,
164 CPDF_CMap::MixedTwoBytes,
165 1,
166 {0xa1, 0xfe}},
167 {"KSCms-UHC",
168 CIDSET_KOREA1,
169 CIDCODING_KOREA,
170 CPDF_CMap::MixedTwoBytes,
171 1,
172 {0x81, 0xfe}},
173 {"KSCms-UHC-HW",
174 CIDSET_KOREA1,
175 CIDCODING_KOREA,
176 CPDF_CMap::MixedTwoBytes,
177 1,
178 {0x81, 0xfe}},
179 {"KSCpc-EUC",
180 CIDSET_KOREA1,
181 CIDCODING_KOREA,
182 CPDF_CMap::MixedTwoBytes,
183 1,
184 {0xa1, 0xfd}},
185 {"UniKS-UCS2", CIDSET_KOREA1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
186 {"UniKS-UTF16", CIDSET_KOREA1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
187 };
188
CIDSetFromSizeT(size_t index)189 CIDSet CIDSetFromSizeT(size_t index) {
190 if (index >= CIDSET_NUM_SETS) {
191 NOTREACHED();
192 return CIDSET_UNKNOWN;
193 }
194 return static_cast<CIDSet>(index);
195 }
196
CMap_GetString(const CFX_ByteStringC & word)197 CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) {
198 if (word.GetLength() <= 2)
199 return CFX_ByteStringC();
200 return CFX_ByteStringC(&word[1], word.GetLength() - 2);
201 }
202
CompareDWORD(const void * data1,const void * data2)203 int CompareDWORD(const void* data1, const void* data2) {
204 return (*(uint32_t*)data1) - (*(uint32_t*)data2);
205 }
206
CompareCID(const void * key,const void * element)207 int CompareCID(const void* key, const void* element) {
208 if ((*(uint32_t*)key) < (*(uint32_t*)element)) {
209 return -1;
210 }
211 if ((*(uint32_t*)key) >
212 (*(uint32_t*)element) + ((uint32_t*)element)[1] / 65536) {
213 return 1;
214 }
215 return 0;
216 }
217
CheckCodeRange(uint8_t * codes,int size,CMap_CodeRange * pRanges,int nRanges)218 int CheckCodeRange(uint8_t* codes,
219 int size,
220 CMap_CodeRange* pRanges,
221 int nRanges) {
222 int iSeg = nRanges - 1;
223 while (iSeg >= 0) {
224 if (pRanges[iSeg].m_CharSize < size) {
225 --iSeg;
226 continue;
227 }
228 int iChar = 0;
229 while (iChar < size) {
230 if (codes[iChar] < pRanges[iSeg].m_Lower[iChar] ||
231 codes[iChar] > pRanges[iSeg].m_Upper[iChar]) {
232 break;
233 }
234 ++iChar;
235 }
236 if (iChar == pRanges[iSeg].m_CharSize)
237 return 2;
238
239 if (iChar)
240 return (size == pRanges[iSeg].m_CharSize) ? 2 : 1;
241 iSeg--;
242 }
243 return 0;
244 }
245
GetCharSizeImpl(uint32_t charcode,CMap_CodeRange * pRanges,int iRangesSize)246 int GetCharSizeImpl(uint32_t charcode,
247 CMap_CodeRange* pRanges,
248 int iRangesSize) {
249 if (!iRangesSize)
250 return 1;
251
252 uint8_t codes[4];
253 codes[0] = codes[1] = 0x00;
254 codes[2] = (uint8_t)(charcode >> 8 & 0xFF);
255 codes[3] = (uint8_t)charcode;
256 int offset = 0;
257 int size = 4;
258 for (int i = 0; i < 4; ++i) {
259 int iSeg = iRangesSize - 1;
260 while (iSeg >= 0) {
261 if (pRanges[iSeg].m_CharSize < size) {
262 --iSeg;
263 continue;
264 }
265 int iChar = 0;
266 while (iChar < size) {
267 if (codes[offset + iChar] < pRanges[iSeg].m_Lower[iChar] ||
268 codes[offset + iChar] > pRanges[iSeg].m_Upper[iChar]) {
269 break;
270 }
271 ++iChar;
272 }
273 if (iChar == pRanges[iSeg].m_CharSize)
274 return size;
275 --iSeg;
276 }
277 --size;
278 ++offset;
279 }
280 return 1;
281 }
282
283 } // namespace
284
CPDF_CMapManager()285 CPDF_CMapManager::CPDF_CMapManager() {}
286
~CPDF_CMapManager()287 CPDF_CMapManager::~CPDF_CMapManager() {}
288
GetPredefinedCMap(const CFX_ByteString & name,bool bPromptCJK)289 CFX_MaybeOwned<CPDF_CMap> CPDF_CMapManager::GetPredefinedCMap(
290 const CFX_ByteString& name,
291 bool bPromptCJK) {
292 auto it = m_CMaps.find(name);
293 if (it != m_CMaps.end())
294 return CFX_MaybeOwned<CPDF_CMap>(it->second.get()); // Unowned.
295
296 std::unique_ptr<CPDF_CMap> pCMap = LoadPredefinedCMap(name, bPromptCJK);
297 if (name.IsEmpty())
298 return CFX_MaybeOwned<CPDF_CMap>(std::move(pCMap)); // Owned.
299
300 CPDF_CMap* pUnowned = pCMap.get();
301 m_CMaps[name] = std::move(pCMap);
302 return CFX_MaybeOwned<CPDF_CMap>(pUnowned); // Unowned.
303 }
304
LoadPredefinedCMap(const CFX_ByteString & name,bool bPromptCJK)305 std::unique_ptr<CPDF_CMap> CPDF_CMapManager::LoadPredefinedCMap(
306 const CFX_ByteString& name,
307 bool bPromptCJK) {
308 auto pCMap = pdfium::MakeUnique<CPDF_CMap>();
309 const FX_CHAR* pname = name.c_str();
310 if (*pname == '/')
311 pname++;
312
313 pCMap->LoadPredefined(this, pname, bPromptCJK);
314 return pCMap;
315 }
316
GetCID2UnicodeMap(CIDSet charset,bool bPromptCJK)317 CPDF_CID2UnicodeMap* CPDF_CMapManager::GetCID2UnicodeMap(CIDSet charset,
318 bool bPromptCJK) {
319 if (!m_CID2UnicodeMaps[charset])
320 m_CID2UnicodeMaps[charset] = LoadCID2UnicodeMap(charset, bPromptCJK);
321
322 return m_CID2UnicodeMaps[charset].get();
323 }
324
LoadCID2UnicodeMap(CIDSet charset,bool bPromptCJK)325 std::unique_ptr<CPDF_CID2UnicodeMap> CPDF_CMapManager::LoadCID2UnicodeMap(
326 CIDSet charset,
327 bool bPromptCJK) {
328 auto pMap = pdfium::MakeUnique<CPDF_CID2UnicodeMap>();
329 pMap->Load(this, charset, bPromptCJK);
330 return pMap;
331 }
332
CPDF_CMapParser()333 CPDF_CMapParser::CPDF_CMapParser()
334 : m_pCMap(nullptr), m_Status(0), m_CodeSeq(0) {}
335
~CPDF_CMapParser()336 CPDF_CMapParser::~CPDF_CMapParser() {}
337
Initialize(CPDF_CMap * pCMap)338 void CPDF_CMapParser::Initialize(CPDF_CMap* pCMap) {
339 m_pCMap = pCMap;
340 m_Status = 0;
341 m_CodeSeq = 0;
342 m_AddMaps.EstimateSize(0, 10240);
343 }
344
ParseWord(const CFX_ByteStringC & word)345 void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) {
346 if (word.IsEmpty()) {
347 return;
348 }
349 if (word == "begincidchar") {
350 m_Status = 1;
351 m_CodeSeq = 0;
352 } else if (word == "begincidrange") {
353 m_Status = 2;
354 m_CodeSeq = 0;
355 } else if (word == "endcidrange" || word == "endcidchar") {
356 m_Status = 0;
357 } else if (word == "/WMode") {
358 m_Status = 6;
359 } else if (word == "/Registry") {
360 m_Status = 3;
361 } else if (word == "/Ordering") {
362 m_Status = 4;
363 } else if (word == "/Supplement") {
364 m_Status = 5;
365 } else if (word == "begincodespacerange") {
366 m_Status = 7;
367 m_CodeSeq = 0;
368 } else if (word == "usecmap") {
369 } else if (m_Status == 1 || m_Status == 2) {
370 m_CodePoints[m_CodeSeq] = CMap_GetCode(word);
371 m_CodeSeq++;
372 uint32_t StartCode, EndCode;
373 uint16_t StartCID;
374 if (m_Status == 1) {
375 if (m_CodeSeq < 2) {
376 return;
377 }
378 EndCode = StartCode = m_CodePoints[0];
379 StartCID = (uint16_t)m_CodePoints[1];
380 } else {
381 if (m_CodeSeq < 3) {
382 return;
383 }
384 StartCode = m_CodePoints[0];
385 EndCode = m_CodePoints[1];
386 StartCID = (uint16_t)m_CodePoints[2];
387 }
388 if (EndCode < 0x10000) {
389 for (uint32_t code = StartCode; code <= EndCode; code++) {
390 m_pCMap->m_pMapping[code] = (uint16_t)(StartCID + code - StartCode);
391 }
392 } else {
393 uint32_t buf[2];
394 buf[0] = StartCode;
395 buf[1] = ((EndCode - StartCode) << 16) + StartCID;
396 m_AddMaps.AppendBlock(buf, sizeof buf);
397 }
398 m_CodeSeq = 0;
399 } else if (m_Status == 3) {
400 m_Status = 0;
401 } else if (m_Status == 4) {
402 m_pCMap->m_Charset = CharsetFromOrdering(CMap_GetString(word));
403 m_Status = 0;
404 } else if (m_Status == 5) {
405 m_Status = 0;
406 } else if (m_Status == 6) {
407 m_pCMap->m_bVertical = CMap_GetCode(word) != 0;
408 m_Status = 0;
409 } else if (m_Status == 7) {
410 if (word == "endcodespacerange") {
411 uint32_t nSegs = pdfium::CollectionSize<uint32_t>(m_CodeRanges);
412 if (nSegs > 1) {
413 m_pCMap->m_CodingScheme = CPDF_CMap::MixedFourBytes;
414 m_pCMap->m_nCodeRanges = nSegs;
415 FX_Free(m_pCMap->m_pLeadingBytes);
416 m_pCMap->m_pLeadingBytes =
417 FX_Alloc2D(uint8_t, nSegs, sizeof(CMap_CodeRange));
418 FXSYS_memcpy(m_pCMap->m_pLeadingBytes, m_CodeRanges.data(),
419 nSegs * sizeof(CMap_CodeRange));
420 } else if (nSegs == 1) {
421 m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2)
422 ? CPDF_CMap::TwoBytes
423 : CPDF_CMap::OneByte;
424 }
425 m_Status = 0;
426 } else {
427 if (word.GetLength() == 0 || word.GetAt(0) != '<') {
428 return;
429 }
430 if (m_CodeSeq % 2) {
431 CMap_CodeRange range;
432 if (CMap_GetCodeRange(range, m_LastWord.AsStringC(), word))
433 m_CodeRanges.push_back(range);
434 }
435 m_CodeSeq++;
436 }
437 }
438 m_LastWord = word;
439 }
440
441 // Static.
CMap_GetCode(const CFX_ByteStringC & word)442 uint32_t CPDF_CMapParser::CMap_GetCode(const CFX_ByteStringC& word) {
443 pdfium::base::CheckedNumeric<uint32_t> num = 0;
444 if (word.GetAt(0) == '<') {
445 for (int i = 1; i < word.GetLength() && std::isxdigit(word.GetAt(i)); ++i) {
446 num = num * 16 + FXSYS_toHexDigit(word.GetAt(i));
447 if (!num.IsValid())
448 return 0;
449 }
450 return num.ValueOrDie();
451 }
452
453 for (int i = 0; i < word.GetLength() && std::isdigit(word.GetAt(i)); ++i) {
454 num = num * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(word.GetAt(i)));
455 if (!num.IsValid())
456 return 0;
457 }
458 return num.ValueOrDie();
459 }
460
461 // Static.
CMap_GetCodeRange(CMap_CodeRange & range,const CFX_ByteStringC & first,const CFX_ByteStringC & second)462 bool CPDF_CMapParser::CMap_GetCodeRange(CMap_CodeRange& range,
463 const CFX_ByteStringC& first,
464 const CFX_ByteStringC& second) {
465 if (first.GetLength() == 0 || first.GetAt(0) != '<')
466 return false;
467
468 int i;
469 for (i = 1; i < first.GetLength(); ++i) {
470 if (first.GetAt(i) == '>') {
471 break;
472 }
473 }
474 range.m_CharSize = (i - 1) / 2;
475 if (range.m_CharSize > 4)
476 return false;
477
478 for (i = 0; i < range.m_CharSize; ++i) {
479 uint8_t digit1 = first.GetAt(i * 2 + 1);
480 uint8_t digit2 = first.GetAt(i * 2 + 2);
481 range.m_Lower[i] = FXSYS_toHexDigit(digit1) * 16 + FXSYS_toHexDigit(digit2);
482 }
483
484 uint32_t size = second.GetLength();
485 for (i = 0; i < range.m_CharSize; ++i) {
486 uint8_t digit1 = ((uint32_t)i * 2 + 1 < size)
487 ? second.GetAt((FX_STRSIZE)i * 2 + 1)
488 : '0';
489 uint8_t digit2 = ((uint32_t)i * 2 + 2 < size)
490 ? second.GetAt((FX_STRSIZE)i * 2 + 2)
491 : '0';
492 range.m_Upper[i] = FXSYS_toHexDigit(digit1) * 16 + FXSYS_toHexDigit(digit2);
493 }
494 return true;
495 }
496
CPDF_CMap()497 CPDF_CMap::CPDF_CMap() {
498 m_Charset = CIDSET_UNKNOWN;
499 m_Coding = CIDCODING_UNKNOWN;
500 m_CodingScheme = TwoBytes;
501 m_bVertical = false;
502 m_bLoaded = false;
503 m_pMapping = nullptr;
504 m_pLeadingBytes = nullptr;
505 m_pAddMapping = nullptr;
506 m_pEmbedMap = nullptr;
507 m_nCodeRanges = 0;
508 }
~CPDF_CMap()509 CPDF_CMap::~CPDF_CMap() {
510 FX_Free(m_pMapping);
511 FX_Free(m_pAddMapping);
512 FX_Free(m_pLeadingBytes);
513 }
514
IsLoaded() const515 bool CPDF_CMap::IsLoaded() const {
516 return m_bLoaded;
517 }
518
IsVertWriting() const519 bool CPDF_CMap::IsVertWriting() const {
520 return m_bVertical;
521 }
522
LoadPredefined(CPDF_CMapManager * pMgr,const CFX_ByteString & bsName,bool bPromptCJK)523 void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr,
524 const CFX_ByteString& bsName,
525 bool bPromptCJK) {
526 m_PredefinedCMap = bsName;
527 if (m_PredefinedCMap == "Identity-H" || m_PredefinedCMap == "Identity-V") {
528 m_Coding = CIDCODING_CID;
529 m_bVertical = bsName[9] == 'V';
530 m_bLoaded = true;
531 return;
532 }
533 CFX_ByteString cmapid = m_PredefinedCMap;
534 m_bVertical = cmapid.Right(1) == "V";
535 if (cmapid.GetLength() > 2) {
536 cmapid = cmapid.Left(cmapid.GetLength() - 2);
537 }
538 const CPDF_PredefinedCMap* map = nullptr;
539 for (size_t i = 0; i < FX_ArraySize(g_PredefinedCMaps); ++i) {
540 if (cmapid == CFX_ByteStringC(g_PredefinedCMaps[i].m_pName)) {
541 map = &g_PredefinedCMaps[i];
542 break;
543 }
544 }
545 if (!map)
546 return;
547
548 m_Charset = map->m_Charset;
549 m_Coding = map->m_Coding;
550 m_CodingScheme = map->m_CodingScheme;
551 if (m_CodingScheme == MixedTwoBytes) {
552 m_pLeadingBytes = FX_Alloc(uint8_t, 256);
553 for (uint32_t i = 0; i < map->m_LeadingSegCount; ++i) {
554 const uint8_t* segs = map->m_LeadingSegs;
555 for (int b = segs[i * 2]; b <= segs[i * 2 + 1]; ++b) {
556 m_pLeadingBytes[b] = 1;
557 }
558 }
559 }
560 FPDFAPI_FindEmbeddedCMap(bsName, m_Charset, m_Coding, m_pEmbedMap);
561 if (!m_pEmbedMap)
562 return;
563
564 m_bLoaded = true;
565 }
566
LoadEmbedded(const uint8_t * pData,uint32_t size)567 void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) {
568 m_pMapping = FX_Alloc(uint16_t, 65536);
569 CPDF_CMapParser parser;
570 parser.Initialize(this);
571 CPDF_SimpleParser syntax(pData, size);
572 while (1) {
573 CFX_ByteStringC word = syntax.GetWord();
574 if (word.IsEmpty()) {
575 break;
576 }
577 parser.ParseWord(word);
578 }
579 if (m_CodingScheme == MixedFourBytes && parser.m_AddMaps.GetSize()) {
580 m_pAddMapping = FX_Alloc(uint8_t, parser.m_AddMaps.GetSize() + 4);
581 *(uint32_t*)m_pAddMapping = parser.m_AddMaps.GetSize() / 8;
582 FXSYS_memcpy(m_pAddMapping + 4, parser.m_AddMaps.GetBuffer(),
583 parser.m_AddMaps.GetSize());
584 FXSYS_qsort(m_pAddMapping + 4, parser.m_AddMaps.GetSize() / 8, 8,
585 CompareDWORD);
586 }
587 }
588
CIDFromCharCode(uint32_t charcode) const589 uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const {
590 if (m_Coding == CIDCODING_CID) {
591 return (uint16_t)charcode;
592 }
593 if (m_pEmbedMap) {
594 return FPDFAPI_CIDFromCharCode(m_pEmbedMap, charcode);
595 }
596 if (!m_pMapping) {
597 return (uint16_t)charcode;
598 }
599 if (charcode >> 16) {
600 if (m_pAddMapping) {
601 void* found = FXSYS_bsearch(&charcode, m_pAddMapping + 4,
602 *(uint32_t*)m_pAddMapping, 8, CompareCID);
603 if (!found)
604 return 0;
605 return (uint16_t)(((uint32_t*)found)[1] % 65536 + charcode -
606 *(uint32_t*)found);
607 }
608 return 0;
609 }
610 return m_pMapping[charcode];
611 }
612
GetNextChar(const FX_CHAR * pString,int nStrLen,int & offset) const613 uint32_t CPDF_CMap::GetNextChar(const FX_CHAR* pString,
614 int nStrLen,
615 int& offset) const {
616 switch (m_CodingScheme) {
617 case OneByte:
618 return ((uint8_t*)pString)[offset++];
619 case TwoBytes:
620 offset += 2;
621 return ((uint8_t*)pString)[offset - 2] * 256 +
622 ((uint8_t*)pString)[offset - 1];
623 case MixedTwoBytes: {
624 uint8_t byte1 = ((uint8_t*)pString)[offset++];
625 if (!m_pLeadingBytes[byte1]) {
626 return byte1;
627 }
628 uint8_t byte2 = ((uint8_t*)pString)[offset++];
629 return byte1 * 256 + byte2;
630 }
631 case MixedFourBytes: {
632 uint8_t codes[4];
633 int char_size = 1;
634 codes[0] = ((uint8_t*)pString)[offset++];
635 CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes;
636 while (1) {
637 int ret = CheckCodeRange(codes, char_size, pRanges, m_nCodeRanges);
638 if (ret == 0) {
639 return 0;
640 }
641 if (ret == 2) {
642 uint32_t charcode = 0;
643 for (int i = 0; i < char_size; i++) {
644 charcode = (charcode << 8) + codes[i];
645 }
646 return charcode;
647 }
648 if (char_size == 4 || offset == nStrLen) {
649 return 0;
650 }
651 codes[char_size++] = ((uint8_t*)pString)[offset++];
652 }
653 break;
654 }
655 }
656 return 0;
657 }
GetCharSize(uint32_t charcode) const658 int CPDF_CMap::GetCharSize(uint32_t charcode) const {
659 switch (m_CodingScheme) {
660 case OneByte:
661 return 1;
662 case TwoBytes:
663 return 2;
664 case MixedTwoBytes:
665 case MixedFourBytes:
666 if (charcode < 0x100) {
667 return 1;
668 }
669 if (charcode < 0x10000) {
670 return 2;
671 }
672 if (charcode < 0x1000000) {
673 return 3;
674 }
675 return 4;
676 }
677 return 1;
678 }
CountChar(const FX_CHAR * pString,int size) const679 int CPDF_CMap::CountChar(const FX_CHAR* pString, int size) const {
680 switch (m_CodingScheme) {
681 case OneByte:
682 return size;
683 case TwoBytes:
684 return (size + 1) / 2;
685 case MixedTwoBytes: {
686 int count = 0;
687 for (int i = 0; i < size; i++) {
688 count++;
689 if (m_pLeadingBytes[((uint8_t*)pString)[i]]) {
690 i++;
691 }
692 }
693 return count;
694 }
695 case MixedFourBytes: {
696 int count = 0, offset = 0;
697 while (offset < size) {
698 GetNextChar(pString, size, offset);
699 count++;
700 }
701 return count;
702 }
703 }
704 return size;
705 }
706
AppendChar(FX_CHAR * str,uint32_t charcode) const707 int CPDF_CMap::AppendChar(FX_CHAR* str, uint32_t charcode) const {
708 switch (m_CodingScheme) {
709 case OneByte:
710 str[0] = (uint8_t)charcode;
711 return 1;
712 case TwoBytes:
713 str[0] = (uint8_t)(charcode / 256);
714 str[1] = (uint8_t)(charcode % 256);
715 return 2;
716 case MixedTwoBytes:
717 case MixedFourBytes:
718 if (charcode < 0x100) {
719 CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes;
720 int iSize = GetCharSizeImpl(charcode, pRanges, m_nCodeRanges);
721 if (iSize == 0) {
722 iSize = 1;
723 }
724 if (iSize > 1) {
725 FXSYS_memset(str, 0, sizeof(uint8_t) * iSize);
726 }
727 str[iSize - 1] = (uint8_t)charcode;
728 return iSize;
729 }
730 if (charcode < 0x10000) {
731 str[0] = (uint8_t)(charcode >> 8);
732 str[1] = (uint8_t)charcode;
733 return 2;
734 }
735 if (charcode < 0x1000000) {
736 str[0] = (uint8_t)(charcode >> 16);
737 str[1] = (uint8_t)(charcode >> 8);
738 str[2] = (uint8_t)charcode;
739 return 3;
740 }
741 str[0] = (uint8_t)(charcode >> 24);
742 str[1] = (uint8_t)(charcode >> 16);
743 str[2] = (uint8_t)(charcode >> 8);
744 str[3] = (uint8_t)charcode;
745 return 4;
746 }
747 return 0;
748 }
749
CPDF_CID2UnicodeMap()750 CPDF_CID2UnicodeMap::CPDF_CID2UnicodeMap() {
751 m_EmbeddedCount = 0;
752 }
753
~CPDF_CID2UnicodeMap()754 CPDF_CID2UnicodeMap::~CPDF_CID2UnicodeMap() {}
755
IsLoaded()756 bool CPDF_CID2UnicodeMap::IsLoaded() {
757 return m_EmbeddedCount != 0;
758 }
759
UnicodeFromCID(uint16_t CID)760 FX_WCHAR CPDF_CID2UnicodeMap::UnicodeFromCID(uint16_t CID) {
761 if (m_Charset == CIDSET_UNICODE) {
762 return CID;
763 }
764 if (CID < m_EmbeddedCount) {
765 return m_pEmbeddedMap[CID];
766 }
767 return 0;
768 }
769
Load(CPDF_CMapManager * pMgr,CIDSet charset,bool bPromptCJK)770 void CPDF_CID2UnicodeMap::Load(CPDF_CMapManager* pMgr,
771 CIDSet charset,
772 bool bPromptCJK) {
773 m_Charset = charset;
774
775 CPDF_FontGlobals* pFontGlobals =
776 CPDF_ModuleMgr::Get()->GetPageModule()->GetFontGlobals();
777 m_pEmbeddedMap = pFontGlobals->m_EmbeddedToUnicodes[charset].m_pMap;
778 m_EmbeddedCount = pFontGlobals->m_EmbeddedToUnicodes[charset].m_Count;
779 }
780
CharsetFromOrdering(const CFX_ByteStringC & ordering)781 CIDSet CharsetFromOrdering(const CFX_ByteStringC& ordering) {
782 for (size_t charset = 1; charset < FX_ArraySize(g_CharsetNames); ++charset) {
783 if (ordering == g_CharsetNames[charset])
784 return CIDSetFromSizeT(charset);
785 }
786 return CIDSET_UNKNOWN;
787 }
788