1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxge/cfx_folderfontinfo.h"
8
9 #include <array>
10 #include <iterator>
11 #include <limits>
12 #include <utility>
13
14 #include "build/build_config.h"
15 #include "core/fxcrt/byteorder.h"
16 #include "core/fxcrt/check_op.h"
17 #include "core/fxcrt/compiler_specific.h"
18 #include "core/fxcrt/containers/contains.h"
19 #include "core/fxcrt/fixed_size_data_vector.h"
20 #include "core/fxcrt/fx_codepage.h"
21 #include "core/fxcrt/fx_extension.h"
22 #include "core/fxcrt/fx_folder.h"
23 #include "core/fxcrt/fx_safe_types.h"
24 #include "core/fxcrt/fx_system.h"
25 #include "core/fxcrt/stl_util.h"
26 #include "core/fxge/cfx_fontmapper.h"
27 #include "core/fxge/fx_font.h"
28
29 namespace {
30
31 struct FontSubst {
32 const char* m_pName;
33 const char* m_pSubstName;
34 };
35
36 constexpr auto kBase14Substs = fxcrt::ToArray<const FontSubst>({
37 {"Courier", "Courier New"},
38 {"Courier-Bold", "Courier New Bold"},
39 {"Courier-BoldOblique", "Courier New Bold Italic"},
40 {"Courier-Oblique", "Courier New Italic"},
41 {"Helvetica", "Arial"},
42 {"Helvetica-Bold", "Arial Bold"},
43 {"Helvetica-BoldOblique", "Arial Bold Italic"},
44 {"Helvetica-Oblique", "Arial Italic"},
45 {"Times-Roman", "Times New Roman"},
46 {"Times-Bold", "Times New Roman Bold"},
47 {"Times-BoldItalic", "Times New Roman Bold Italic"},
48 {"Times-Italic", "Times New Roman Italic"},
49 });
50
51 // Used with std::unique_ptr to automatically call fclose().
52 struct FxFileCloser {
operator ()__anon87db9e3c0111::FxFileCloser53 inline void operator()(FILE* h) const {
54 if (h)
55 fclose(h);
56 }
57 };
58
FindFamilyNameMatch(ByteStringView family_name,const ByteString & installed_font_name)59 bool FindFamilyNameMatch(ByteStringView family_name,
60 const ByteString& installed_font_name) {
61 std::optional<size_t> result = installed_font_name.Find(family_name, 0);
62 if (!result.has_value())
63 return false;
64
65 size_t next_index = result.value() + family_name.GetLength();
66 // Rule out the case that |family_name| is a substring of
67 // |installed_font_name| but their family names are actually different words.
68 // For example: "Univers" and "Universal" are not a match because they have
69 // different family names, but "Univers" and "Univers Bold" are a match.
70 if (installed_font_name.IsValidIndex(next_index) &&
71 FXSYS_IsLowerASCII(installed_font_name[next_index])) {
72 return false;
73 }
74
75 return true;
76 }
77
ReadStringFromFile(FILE * pFile,uint32_t size)78 ByteString ReadStringFromFile(FILE* pFile, uint32_t size) {
79 ByteString result;
80 {
81 // Span's lifetime must end before ReleaseBuffer() below.
82 pdfium::span<char> buffer = result.GetBuffer(size);
83 if (!fread(buffer.data(), size, 1, pFile))
84 return ByteString();
85 }
86 result.ReleaseBuffer(size);
87 return result;
88 }
89
LoadTableFromTT(FILE * pFile,const uint8_t * pTables,uint32_t nTables,uint32_t tag,FX_FILESIZE fileSize)90 ByteString LoadTableFromTT(FILE* pFile,
91 const uint8_t* pTables,
92 uint32_t nTables,
93 uint32_t tag,
94 FX_FILESIZE fileSize) {
95 UNSAFE_TODO({
96 for (uint32_t i = 0; i < nTables; i++) {
97 // TODO(tsepez): use actual span.
98 auto p = pdfium::make_span(pTables + i * 16, 16u);
99 if (fxcrt::GetUInt32MSBFirst(p) == tag) {
100 uint32_t offset = fxcrt::GetUInt32MSBFirst(p.subspan(8));
101 uint32_t size = fxcrt::GetUInt32MSBFirst(p.subspan(12));
102 if (offset > std::numeric_limits<uint32_t>::max() - size ||
103 static_cast<FX_FILESIZE>(offset + size) > fileSize ||
104 fseek(pFile, offset, SEEK_SET) < 0) {
105 return ByteString();
106 }
107 return ReadStringFromFile(pFile, size);
108 }
109 }
110 });
111 return ByteString();
112 }
113
GetCharset(FX_Charset charset)114 uint32_t GetCharset(FX_Charset charset) {
115 switch (charset) {
116 case FX_Charset::kShiftJIS:
117 return CHARSET_FLAG_SHIFTJIS;
118 case FX_Charset::kChineseSimplified:
119 return CHARSET_FLAG_GB;
120 case FX_Charset::kChineseTraditional:
121 return CHARSET_FLAG_BIG5;
122 case FX_Charset::kHangul:
123 return CHARSET_FLAG_KOREAN;
124 case FX_Charset::kSymbol:
125 return CHARSET_FLAG_SYMBOL;
126 case FX_Charset::kANSI:
127 return CHARSET_FLAG_ANSI;
128 default:
129 break;
130 }
131 return 0;
132 }
133
134 } // namespace
135
136 CFX_FolderFontInfo::CFX_FolderFontInfo() = default;
137
138 CFX_FolderFontInfo::~CFX_FolderFontInfo() = default;
139
AddPath(const ByteString & path)140 void CFX_FolderFontInfo::AddPath(const ByteString& path) {
141 m_PathList.push_back(path);
142 }
143
EnumFontList(CFX_FontMapper * pMapper)144 bool CFX_FolderFontInfo::EnumFontList(CFX_FontMapper* pMapper) {
145 m_pMapper = pMapper;
146 for (const auto& path : m_PathList)
147 ScanPath(path);
148 return true;
149 }
150
ScanPath(const ByteString & path)151 void CFX_FolderFontInfo::ScanPath(const ByteString& path) {
152 std::unique_ptr<FX_Folder> handle = FX_Folder::OpenFolder(path);
153 if (!handle)
154 return;
155
156 ByteString filename;
157 bool bFolder;
158 while (handle->GetNextFile(&filename, &bFolder)) {
159 if (bFolder) {
160 if (filename == "." || filename == "..")
161 continue;
162 } else {
163 ByteString ext = filename.Last(4);
164 ext.MakeLower();
165 if (ext != ".ttf" && ext != ".ttc" && ext != ".otf")
166 continue;
167 }
168
169 ByteString fullpath = path;
170 #if BUILDFLAG(IS_WIN)
171 fullpath += "\\";
172 #else
173 fullpath += "/";
174 #endif
175
176 fullpath += filename;
177 bFolder ? ScanPath(fullpath) : ScanFile(fullpath);
178 }
179 }
180
ScanFile(const ByteString & path)181 void CFX_FolderFontInfo::ScanFile(const ByteString& path) {
182 std::unique_ptr<FILE, FxFileCloser> pFile(fopen(path.c_str(), "rb"));
183 if (!pFile)
184 return;
185
186 fseek(pFile.get(), 0, SEEK_END);
187
188 FX_FILESIZE filesize = ftell(pFile.get());
189 uint8_t buffer[16];
190 fseek(pFile.get(), 0, SEEK_SET);
191
192 size_t items_read = fread(buffer, /*size=*/12, /*nmemb=*/1, pFile.get());
193 if (items_read != 1) {
194 return;
195 }
196 uint32_t magic =
197 fxcrt::GetUInt32MSBFirst(pdfium::make_span(buffer).first<4u>());
198 if (magic != kTableTTCF) {
199 ReportFace(path, pFile.get(), filesize, 0);
200 return;
201 }
202
203 uint32_t nFaces =
204 fxcrt::GetUInt32MSBFirst(pdfium::make_span(buffer).subspan<8u>());
205 FX_SAFE_SIZE_T safe_face_bytes = nFaces;
206 safe_face_bytes *= 4;
207 if (!safe_face_bytes.IsValid())
208 return;
209
210 auto offsets =
211 FixedSizeDataVector<uint8_t>::Uninit(safe_face_bytes.ValueOrDie());
212 pdfium::span<uint8_t> offsets_span = offsets.span();
213 items_read = fread(offsets_span.data(), /*size=*/1,
214 /*nmemb=*/offsets_span.size(), pFile.get());
215 if (items_read != offsets_span.size()) {
216 return;
217 }
218
219 for (uint32_t i = 0; i < nFaces; i++) {
220 ReportFace(path, pFile.get(), filesize,
221 fxcrt::GetUInt32MSBFirst(offsets_span.subspan(i * 4)));
222 }
223 }
224
ReportFace(const ByteString & path,FILE * pFile,FX_FILESIZE filesize,uint32_t offset)225 void CFX_FolderFontInfo::ReportFace(const ByteString& path,
226 FILE* pFile,
227 FX_FILESIZE filesize,
228 uint32_t offset) {
229 char buffer[16];
230 if (fseek(pFile, offset, SEEK_SET) < 0 || !fread(buffer, 12, 1, pFile))
231 return;
232
233 uint32_t nTables =
234 fxcrt::GetUInt16MSBFirst(pdfium::as_byte_span(buffer).subspan<4, 2>());
235 ByteString tables = ReadStringFromFile(pFile, nTables * 16);
236 if (tables.IsEmpty())
237 return;
238
239 static constexpr uint32_t kNameTag =
240 CFX_FontMapper::MakeTag('n', 'a', 'm', 'e');
241 ByteString names = LoadTableFromTT(pFile, tables.unsigned_str(), nTables,
242 kNameTag, filesize);
243 if (names.IsEmpty())
244 return;
245
246 ByteString facename = GetNameFromTT(names.unsigned_span(), 1);
247 if (facename.IsEmpty())
248 return;
249
250 ByteString style = GetNameFromTT(names.unsigned_span(), 2);
251 if (style != "Regular")
252 facename += " " + style;
253
254 if (pdfium::Contains(m_FontList, facename))
255 return;
256
257 auto pInfo =
258 std::make_unique<FontFaceInfo>(path, facename, tables, offset, filesize);
259 static constexpr uint32_t kOs2Tag =
260 CFX_FontMapper::MakeTag('O', 'S', '/', '2');
261 ByteString os2 =
262 LoadTableFromTT(pFile, tables.unsigned_str(), nTables, kOs2Tag, filesize);
263 if (os2.GetLength() >= 86) {
264 pdfium::span<const uint8_t> p = os2.unsigned_span().subspan(78);
265 uint32_t codepages = fxcrt::GetUInt32MSBFirst(p);
266 if (codepages & (1U << 17)) {
267 m_pMapper->AddInstalledFont(facename, FX_Charset::kShiftJIS);
268 pInfo->m_Charsets |= CHARSET_FLAG_SHIFTJIS;
269 }
270 if (codepages & (1U << 18)) {
271 m_pMapper->AddInstalledFont(facename, FX_Charset::kChineseSimplified);
272 pInfo->m_Charsets |= CHARSET_FLAG_GB;
273 }
274 if (codepages & (1U << 20)) {
275 m_pMapper->AddInstalledFont(facename, FX_Charset::kChineseTraditional);
276 pInfo->m_Charsets |= CHARSET_FLAG_BIG5;
277 }
278 if ((codepages & (1U << 19)) || (codepages & (1U << 21))) {
279 m_pMapper->AddInstalledFont(facename, FX_Charset::kHangul);
280 pInfo->m_Charsets |= CHARSET_FLAG_KOREAN;
281 }
282 if (codepages & (1U << 31)) {
283 m_pMapper->AddInstalledFont(facename, FX_Charset::kSymbol);
284 pInfo->m_Charsets |= CHARSET_FLAG_SYMBOL;
285 }
286 }
287 m_pMapper->AddInstalledFont(facename, FX_Charset::kANSI);
288 pInfo->m_Charsets |= CHARSET_FLAG_ANSI;
289 pInfo->m_Styles = 0;
290 if (style.Contains("Bold"))
291 pInfo->m_Styles |= FXFONT_FORCE_BOLD;
292 if (style.Contains("Italic") || style.Contains("Oblique"))
293 pInfo->m_Styles |= FXFONT_ITALIC;
294 if (facename.Contains("Serif"))
295 pInfo->m_Styles |= FXFONT_SERIF;
296
297 m_FontList[facename] = std::move(pInfo);
298 }
299
GetSubstFont(const ByteString & face)300 void* CFX_FolderFontInfo::GetSubstFont(const ByteString& face) {
301 for (size_t iBaseFont = 0; iBaseFont < std::size(kBase14Substs);
302 iBaseFont++) {
303 if (face == kBase14Substs[iBaseFont].m_pName) {
304 return GetFont(kBase14Substs[iBaseFont].m_pSubstName);
305 }
306 }
307 return nullptr;
308 }
309
FindFont(int weight,bool bItalic,FX_Charset charset,int pitch_family,const ByteString & family,bool bMatchName)310 void* CFX_FolderFontInfo::FindFont(int weight,
311 bool bItalic,
312 FX_Charset charset,
313 int pitch_family,
314 const ByteString& family,
315 bool bMatchName) {
316 FontFaceInfo* pFind = nullptr;
317 uint32_t charset_flag = GetCharset(charset);
318
319 int32_t iBestSimilar = 0;
320 if (bMatchName) {
321 // Try a direct lookup for either a perfect score or to determine a
322 // baseline similarity score.
323 auto direct_it = m_FontList.find(family);
324 if (direct_it != m_FontList.end()) {
325 FontFaceInfo* pFont = direct_it->second.get();
326 if (pFont->IsEligibleForFindFont(charset_flag, charset)) {
327 iBestSimilar =
328 pFont->SimilarityScore(weight, bItalic, pitch_family, bMatchName);
329 if (iBestSimilar == FontFaceInfo::kSimilarityScoreMax) {
330 return pFont;
331 }
332 pFind = pFont;
333 }
334 }
335 }
336 // Try and find a better match. Since FindFamilyNameMatch() is expensive,
337 // avoid calling it unless there might be a better match.
338 ByteStringView bsFamily = family.AsStringView();
339 for (const auto& it : m_FontList) {
340 const ByteString& bsName = it.first;
341 FontFaceInfo* pFont = it.second.get();
342 if (!pFont->IsEligibleForFindFont(charset_flag, charset)) {
343 continue;
344 }
345 int32_t iSimilarValue = pFont->SimilarityScore(
346 weight, bItalic, pitch_family,
347 bMatchName && bsFamily.GetLength() == bsName.GetLength());
348 if (iSimilarValue > iBestSimilar) {
349 if (bMatchName && !FindFamilyNameMatch(bsFamily, bsName)) {
350 continue;
351 }
352 iBestSimilar = iSimilarValue;
353 pFind = pFont;
354 }
355 }
356
357 if (pFind) {
358 return pFind;
359 }
360
361 if (charset == FX_Charset::kANSI && FontFamilyIsFixedPitch(pitch_family)) {
362 auto* courier_new = GetFont("Courier New");
363 if (courier_new)
364 return courier_new;
365 }
366
367 return nullptr;
368 }
369
MapFont(int weight,bool bItalic,FX_Charset charset,int pitch_family,const ByteString & face)370 void* CFX_FolderFontInfo::MapFont(int weight,
371 bool bItalic,
372 FX_Charset charset,
373 int pitch_family,
374 const ByteString& face) {
375 return nullptr;
376 }
377
GetFont(const ByteString & face)378 void* CFX_FolderFontInfo::GetFont(const ByteString& face) {
379 auto it = m_FontList.find(face);
380 return it != m_FontList.end() ? it->second.get() : nullptr;
381 }
382
GetFontData(void * hFont,uint32_t table,pdfium::span<uint8_t> buffer)383 size_t CFX_FolderFontInfo::GetFontData(void* hFont,
384 uint32_t table,
385 pdfium::span<uint8_t> buffer) {
386 if (!hFont)
387 return 0;
388
389 const FontFaceInfo* pFont = static_cast<FontFaceInfo*>(hFont);
390 uint32_t datasize = 0;
391 uint32_t offset = 0;
392 if (table == 0) {
393 datasize = pFont->m_FontOffset ? 0 : pFont->m_FileSize;
394 } else if (table == kTableTTCF) {
395 datasize = pFont->m_FontOffset ? pFont->m_FileSize : 0;
396 } else {
397 size_t nTables = pFont->m_FontTables.GetLength() / 16;
398 for (size_t i = 0; i < nTables; i++) {
399 // TODO(tsepez): iterate over span.
400 pdfium::span<const uint8_t> p =
401 pFont->m_FontTables.unsigned_span().subspan(i * 16);
402 if (fxcrt::GetUInt32MSBFirst(p) == table) {
403 offset = fxcrt::GetUInt32MSBFirst(p.subspan(8));
404 datasize = fxcrt::GetUInt32MSBFirst(p.subspan(12));
405 }
406 }
407 }
408
409 if (!datasize || buffer.size() < datasize)
410 return datasize;
411
412 std::unique_ptr<FILE, FxFileCloser> pFile(
413 fopen(pFont->m_FilePath.c_str(), "rb"));
414 if (!pFile)
415 return 0;
416
417 if (fseek(pFile.get(), offset, SEEK_SET) < 0 ||
418 fread(buffer.data(), datasize, 1, pFile.get()) != 1) {
419 return 0;
420 }
421 return datasize;
422 }
423
DeleteFont(void * hFont)424 void CFX_FolderFontInfo::DeleteFont(void* hFont) {}
425
GetFaceName(void * hFont,ByteString * name)426 bool CFX_FolderFontInfo::GetFaceName(void* hFont, ByteString* name) {
427 if (!hFont)
428 return false;
429 *name = static_cast<FontFaceInfo*>(hFont)->m_FaceName;
430 return true;
431 }
432
GetFontCharset(void * hFont,FX_Charset * charset)433 bool CFX_FolderFontInfo::GetFontCharset(void* hFont, FX_Charset* charset) {
434 return false;
435 }
436
FontFaceInfo(ByteString filePath,ByteString faceName,ByteString fontTables,uint32_t fontOffset,uint32_t fileSize)437 CFX_FolderFontInfo::FontFaceInfo::FontFaceInfo(ByteString filePath,
438 ByteString faceName,
439 ByteString fontTables,
440 uint32_t fontOffset,
441 uint32_t fileSize)
442 : m_FilePath(filePath),
443 m_FaceName(faceName),
444 m_FontTables(fontTables),
445 m_FontOffset(fontOffset),
446 m_FileSize(fileSize) {}
447
IsEligibleForFindFont(uint32_t flag,FX_Charset charset) const448 bool CFX_FolderFontInfo::FontFaceInfo::IsEligibleForFindFont(
449 uint32_t flag,
450 FX_Charset charset) const {
451 return (m_Charsets & flag) || charset == FX_Charset::kDefault;
452 }
453
SimilarityScore(int weight,bool italic,int pitch_family,bool exact_match_bonus) const454 int32_t CFX_FolderFontInfo::FontFaceInfo::SimilarityScore(
455 int weight,
456 bool italic,
457 int pitch_family,
458 bool exact_match_bonus) const {
459 int32_t score = 0;
460 if (FontStyleIsForceBold(m_Styles) == (weight > 400)) {
461 score += 16;
462 }
463 if (FontStyleIsItalic(m_Styles) == italic) {
464 score += 16;
465 }
466 if (FontStyleIsSerif(m_Styles) == FontFamilyIsRoman(pitch_family)) {
467 score += 16;
468 }
469 if (FontStyleIsScript(m_Styles) == FontFamilyIsScript(pitch_family)) {
470 score += 8;
471 }
472 if (FontStyleIsFixedPitch(m_Styles) == FontFamilyIsFixedPitch(pitch_family)) {
473 score += 8;
474 }
475 if (exact_match_bonus) {
476 score += 4;
477 }
478 DCHECK_LE(score, kSimilarityScoreMax);
479 return score;
480 }
481