1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "fpdfsdk/pwl/cpwl_font_map.h"
8
9 #include <utility>
10
11 #include "core/fpdfapi/cpdf_modulemgr.h"
12 #include "core/fpdfapi/font/cpdf_font.h"
13 #include "core/fpdfapi/font/cpdf_fontencoding.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_parser.h"
16 #include "core/fpdfdoc/ipvt_fontmap.h"
17 #include "core/fxcrt/fx_codepage.h"
18 #include "fpdfsdk/pwl/cpwl_wnd.h"
19 #include "third_party/base/ptr_util.h"
20 #include "third_party/base/stl_util.h"
21
22 namespace {
23
24 const char kDefaultFontName[] = "Helvetica";
25
26 const char* const g_sDEStandardFontName[] = {"Courier",
27 "Courier-Bold",
28 "Courier-BoldOblique",
29 "Courier-Oblique",
30 "Helvetica",
31 "Helvetica-Bold",
32 "Helvetica-BoldOblique",
33 "Helvetica-Oblique",
34 "Times-Roman",
35 "Times-Bold",
36 "Times-Italic",
37 "Times-BoldItalic",
38 "Symbol",
39 "ZapfDingbats"};
40
41 } // namespace
42
CPWL_FontMap(CFX_SystemHandler * pSystemHandler)43 CPWL_FontMap::CPWL_FontMap(CFX_SystemHandler* pSystemHandler)
44 : m_pSystemHandler(pSystemHandler) {
45 ASSERT(m_pSystemHandler);
46 }
47
~CPWL_FontMap()48 CPWL_FontMap::~CPWL_FontMap() {
49 Empty();
50 }
51
GetDocument()52 CPDF_Document* CPWL_FontMap::GetDocument() {
53 if (!m_pPDFDoc) {
54 if (CPDF_ModuleMgr::Get()) {
55 m_pPDFDoc = pdfium::MakeUnique<CPDF_Document>(nullptr);
56 m_pPDFDoc->CreateNewDoc();
57 }
58 }
59 return m_pPDFDoc.get();
60 }
61
GetPDFFont(int32_t nFontIndex)62 CPDF_Font* CPWL_FontMap::GetPDFFont(int32_t nFontIndex) {
63 if (pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex])
64 return m_Data[nFontIndex]->pFont;
65
66 return nullptr;
67 }
68
GetPDFFontAlias(int32_t nFontIndex)69 ByteString CPWL_FontMap::GetPDFFontAlias(int32_t nFontIndex) {
70 if (pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex])
71 return m_Data[nFontIndex]->sFontName;
72
73 return ByteString();
74 }
75
KnowWord(int32_t nFontIndex,uint16_t word)76 bool CPWL_FontMap::KnowWord(int32_t nFontIndex, uint16_t word) {
77 return pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex] &&
78 CharCodeFromUnicode(nFontIndex, word) >= 0;
79 }
80
GetWordFontIndex(uint16_t word,int32_t nCharset,int32_t nFontIndex)81 int32_t CPWL_FontMap::GetWordFontIndex(uint16_t word,
82 int32_t nCharset,
83 int32_t nFontIndex) {
84 if (nFontIndex > 0) {
85 if (KnowWord(nFontIndex, word))
86 return nFontIndex;
87 } else {
88 if (const CPWL_FontMap_Data* pData = GetFontMapData(0)) {
89 if (nCharset == FX_CHARSET_Default ||
90 pData->nCharset == FX_CHARSET_Symbol || nCharset == pData->nCharset) {
91 if (KnowWord(0, word))
92 return 0;
93 }
94 }
95 }
96
97 int32_t nNewFontIndex =
98 GetFontIndex(GetNativeFontName(nCharset), nCharset, true);
99 if (nNewFontIndex >= 0) {
100 if (KnowWord(nNewFontIndex, word))
101 return nNewFontIndex;
102 }
103 nNewFontIndex = GetFontIndex("Arial Unicode MS", FX_CHARSET_Default, false);
104 if (nNewFontIndex >= 0) {
105 if (KnowWord(nNewFontIndex, word))
106 return nNewFontIndex;
107 }
108 return -1;
109 }
110
CharCodeFromUnicode(int32_t nFontIndex,uint16_t word)111 int32_t CPWL_FontMap::CharCodeFromUnicode(int32_t nFontIndex, uint16_t word) {
112 if (!pdfium::IndexInBounds(m_Data, nFontIndex))
113 return -1;
114
115 CPWL_FontMap_Data* pData = m_Data[nFontIndex].get();
116 if (!pData || !pData->pFont)
117 return -1;
118
119 if (pData->pFont->IsUnicodeCompatible())
120 return pData->pFont->CharCodeFromUnicode(word);
121
122 return word < 0xFF ? word : -1;
123 }
124
GetNativeFontName(int32_t nCharset)125 ByteString CPWL_FontMap::GetNativeFontName(int32_t nCharset) {
126 for (const auto& pData : m_NativeFont) {
127 if (pData && pData->nCharset == nCharset)
128 return pData->sFontName;
129 }
130
131 ByteString sNew = GetNativeFont(nCharset);
132 if (sNew.IsEmpty())
133 return ByteString();
134
135 auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Native>();
136 pNewData->nCharset = nCharset;
137 pNewData->sFontName = sNew;
138 m_NativeFont.push_back(std::move(pNewData));
139 return sNew;
140 }
141
Empty()142 void CPWL_FontMap::Empty() {
143 m_Data.clear();
144 m_NativeFont.clear();
145 }
146
Initialize()147 void CPWL_FontMap::Initialize() {
148 GetFontIndex(kDefaultFontName, FX_CHARSET_ANSI, false);
149 }
150
IsStandardFont(const ByteString & sFontName)151 bool CPWL_FontMap::IsStandardFont(const ByteString& sFontName) {
152 for (const char* name : g_sDEStandardFontName) {
153 if (sFontName == name)
154 return true;
155 }
156
157 return false;
158 }
159
FindFont(const ByteString & sFontName,int32_t nCharset)160 int32_t CPWL_FontMap::FindFont(const ByteString& sFontName, int32_t nCharset) {
161 int32_t i = 0;
162 for (const auto& pData : m_Data) {
163 if (pData &&
164 (nCharset == FX_CHARSET_Default || nCharset == pData->nCharset) &&
165 (sFontName.IsEmpty() || pData->sFontName == sFontName)) {
166 return i;
167 }
168 ++i;
169 }
170 return -1;
171 }
172
GetFontIndex(const ByteString & sFontName,int32_t nCharset,bool bFind)173 int32_t CPWL_FontMap::GetFontIndex(const ByteString& sFontName,
174 int32_t nCharset,
175 bool bFind) {
176 int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset);
177 if (nFontIndex >= 0)
178 return nFontIndex;
179
180 ByteString sAlias;
181 CPDF_Font* pFont = bFind ? FindFontSameCharset(&sAlias, nCharset) : nullptr;
182 if (!pFont) {
183 ByteString sTemp = sFontName;
184 pFont = AddFontToDocument(GetDocument(), sTemp, nCharset);
185 sAlias = EncodeFontAlias(sTemp, nCharset);
186 }
187 AddedFont(pFont, sAlias);
188 return AddFontData(pFont, sAlias, nCharset);
189 }
190
FindFontSameCharset(ByteString * sFontAlias,int32_t nCharset)191 CPDF_Font* CPWL_FontMap::FindFontSameCharset(ByteString* sFontAlias,
192 int32_t nCharset) {
193 return nullptr;
194 }
195
AddFontData(CPDF_Font * pFont,const ByteString & sFontAlias,int32_t nCharset)196 int32_t CPWL_FontMap::AddFontData(CPDF_Font* pFont,
197 const ByteString& sFontAlias,
198 int32_t nCharset) {
199 auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Data>();
200 pNewData->pFont = pFont;
201 pNewData->sFontName = sFontAlias;
202 pNewData->nCharset = nCharset;
203 m_Data.push_back(std::move(pNewData));
204 return pdfium::CollectionSize<int32_t>(m_Data) - 1;
205 }
206
AddedFont(CPDF_Font * pFont,const ByteString & sFontAlias)207 void CPWL_FontMap::AddedFont(CPDF_Font* pFont, const ByteString& sFontAlias) {}
208
GetNativeFont(int32_t nCharset)209 ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) {
210 if (nCharset == FX_CHARSET_Default)
211 nCharset = GetNativeCharset();
212
213 ByteString sFontName = GetDefaultFontByCharset(nCharset);
214 if (!m_pSystemHandler->FindNativeTrueTypeFont(sFontName))
215 return ByteString();
216
217 return sFontName;
218 }
219
AddFontToDocument(CPDF_Document * pDoc,ByteString & sFontName,uint8_t nCharset)220 CPDF_Font* CPWL_FontMap::AddFontToDocument(CPDF_Document* pDoc,
221 ByteString& sFontName,
222 uint8_t nCharset) {
223 if (IsStandardFont(sFontName))
224 return AddStandardFont(pDoc, sFontName);
225
226 return AddSystemFont(pDoc, sFontName, nCharset);
227 }
228
AddStandardFont(CPDF_Document * pDoc,ByteString & sFontName)229 CPDF_Font* CPWL_FontMap::AddStandardFont(CPDF_Document* pDoc,
230 ByteString& sFontName) {
231 if (!pDoc)
232 return nullptr;
233
234 CPDF_Font* pFont = nullptr;
235
236 if (sFontName == "ZapfDingbats") {
237 pFont = pDoc->AddStandardFont(sFontName.c_str(), nullptr);
238 } else {
239 CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI);
240 pFont = pDoc->AddStandardFont(sFontName.c_str(), &fe);
241 }
242
243 return pFont;
244 }
245
AddSystemFont(CPDF_Document * pDoc,ByteString & sFontName,uint8_t nCharset)246 CPDF_Font* CPWL_FontMap::AddSystemFont(CPDF_Document* pDoc,
247 ByteString& sFontName,
248 uint8_t nCharset) {
249 if (!pDoc)
250 return nullptr;
251
252 if (sFontName.IsEmpty())
253 sFontName = GetNativeFont(nCharset);
254 if (nCharset == FX_CHARSET_Default)
255 nCharset = GetNativeCharset();
256
257 return m_pSystemHandler->AddNativeTrueTypeFontToPDF(pDoc, sFontName,
258 nCharset);
259 }
260
EncodeFontAlias(const ByteString & sFontName,int32_t nCharset)261 ByteString CPWL_FontMap::EncodeFontAlias(const ByteString& sFontName,
262 int32_t nCharset) {
263 return EncodeFontAlias(sFontName) + ByteString::Format("_%02X", nCharset);
264 }
265
EncodeFontAlias(const ByteString & sFontName)266 ByteString CPWL_FontMap::EncodeFontAlias(const ByteString& sFontName) {
267 ByteString sRet = sFontName;
268 sRet.Remove(' ');
269 return sRet;
270 }
271
GetFontMapData(int32_t nIndex) const272 const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const {
273 return pdfium::IndexInBounds(m_Data, nIndex) ? m_Data[nIndex].get() : nullptr;
274 }
275
GetNativeCharset()276 int32_t CPWL_FontMap::GetNativeCharset() {
277 uint8_t nCharset = FX_CHARSET_ANSI;
278 int32_t iCodePage = FXSYS_GetACP();
279 switch (iCodePage) {
280 case FX_CODEPAGE_ShiftJIS:
281 nCharset = FX_CHARSET_ShiftJIS;
282 break;
283 case FX_CODEPAGE_ChineseSimplified:
284 nCharset = FX_CHARSET_ChineseSimplified;
285 break;
286 case FX_CODEPAGE_ChineseTraditional:
287 nCharset = FX_CHARSET_ChineseTraditional;
288 break;
289 case FX_CODEPAGE_MSWin_WesternEuropean:
290 nCharset = FX_CHARSET_ANSI;
291 break;
292 case FX_CODEPAGE_MSDOS_Thai:
293 nCharset = FX_CHARSET_Thai;
294 break;
295 case FX_CODEPAGE_Hangul:
296 nCharset = FX_CHARSET_Hangul;
297 break;
298 case FX_CODEPAGE_UTF16LE:
299 nCharset = FX_CHARSET_ANSI;
300 break;
301 case FX_CODEPAGE_MSWin_EasternEuropean:
302 nCharset = FX_CHARSET_MSWin_EasternEuropean;
303 break;
304 case FX_CODEPAGE_MSWin_Cyrillic:
305 nCharset = FX_CHARSET_MSWin_Cyrillic;
306 break;
307 case FX_CODEPAGE_MSWin_Greek:
308 nCharset = FX_CHARSET_MSWin_Greek;
309 break;
310 case FX_CODEPAGE_MSWin_Turkish:
311 nCharset = FX_CHARSET_MSWin_Turkish;
312 break;
313 case FX_CODEPAGE_MSWin_Hebrew:
314 nCharset = FX_CHARSET_MSWin_Hebrew;
315 break;
316 case FX_CODEPAGE_MSWin_Arabic:
317 nCharset = FX_CHARSET_MSWin_Arabic;
318 break;
319 case FX_CODEPAGE_MSWin_Baltic:
320 nCharset = FX_CHARSET_MSWin_Baltic;
321 break;
322 case FX_CODEPAGE_MSWin_Vietnamese:
323 nCharset = FX_CHARSET_MSWin_Vietnamese;
324 break;
325 case FX_CODEPAGE_Johab:
326 nCharset = FX_CHARSET_Johab;
327 break;
328 }
329 return nCharset;
330 }
331
332 const FPDF_CharsetFontMap CPWL_FontMap::defaultTTFMap[] = {
333 {FX_CHARSET_ANSI, "Helvetica"},
334 {FX_CHARSET_ChineseSimplified, "SimSun"},
335 {FX_CHARSET_ChineseTraditional, "MingLiU"},
336 {FX_CHARSET_ShiftJIS, "MS Gothic"},
337 {FX_CHARSET_Hangul, "Batang"},
338 {FX_CHARSET_MSWin_Cyrillic, "Arial"},
339 #if _FX_PLATFORM_ == _FX_PLATFORM_LINUX_ || _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
340 {FX_CHARSET_MSWin_EasternEuropean, "Arial"},
341 #else
342 {FX_CHARSET_MSWin_EasternEuropean, "Tahoma"},
343 #endif
344 {FX_CHARSET_MSWin_Arabic, "Arial"},
345 {-1, nullptr}};
346
GetDefaultFontByCharset(int32_t nCharset)347 ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) {
348 int i = 0;
349 while (defaultTTFMap[i].charset != -1) {
350 if (nCharset == defaultTTFMap[i].charset)
351 return defaultTTFMap[i].fontname;
352 ++i;
353 }
354 return "";
355 }
356
CharSetFromUnicode(uint16_t word,int32_t nOldCharset)357 int32_t CPWL_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) {
358 // to avoid CJK Font to show ASCII
359 if (word < 0x7F)
360 return FX_CHARSET_ANSI;
361 // follow the old charset
362 if (nOldCharset != FX_CHARSET_Default)
363 return nOldCharset;
364
365 // find new charset
366 if ((word >= 0x4E00 && word <= 0x9FA5) ||
367 (word >= 0xE7C7 && word <= 0xE7F3) ||
368 (word >= 0x3000 && word <= 0x303F) ||
369 (word >= 0x2000 && word <= 0x206F)) {
370 return FX_CHARSET_ChineseSimplified;
371 }
372
373 if (((word >= 0x3040) && (word <= 0x309F)) ||
374 ((word >= 0x30A0) && (word <= 0x30FF)) ||
375 ((word >= 0x31F0) && (word <= 0x31FF)) ||
376 ((word >= 0xFF00) && (word <= 0xFFEF))) {
377 return FX_CHARSET_ShiftJIS;
378 }
379
380 if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
381 ((word >= 0x1100) && (word <= 0x11FF)) ||
382 ((word >= 0x3130) && (word <= 0x318F))) {
383 return FX_CHARSET_Hangul;
384 }
385
386 if (word >= 0x0E00 && word <= 0x0E7F)
387 return FX_CHARSET_Thai;
388
389 if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
390 return FX_CHARSET_MSWin_Greek;
391
392 if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
393 return FX_CHARSET_MSWin_Arabic;
394
395 if (word >= 0x0590 && word <= 0x05FF)
396 return FX_CHARSET_MSWin_Hebrew;
397
398 if (word >= 0x0400 && word <= 0x04FF)
399 return FX_CHARSET_MSWin_Cyrillic;
400
401 if (word >= 0x0100 && word <= 0x024F)
402 return FX_CHARSET_MSWin_EasternEuropean;
403
404 if (word >= 0x1E00 && word <= 0x1EFF)
405 return FX_CHARSET_MSWin_Vietnamese;
406
407 return FX_CHARSET_ANSI;
408 }
409