• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FXCRT_FX_CODEPAGE_H_
8 #define CORE_FXCRT_FX_CODEPAGE_H_
9 
10 #include <stdint.h>
11 
12 #include <array>
13 
14 // Prove consistency with incomplete forward definitions.
15 #include "core/fxcrt/fx_codepage_forward.h"
16 #include "core/fxcrt/fx_string.h"
17 #include "core/fxcrt/span.h"
18 
19 enum class FX_CodePage : uint16_t {
20   kDefANSI = 0,
21   kSymbol = 42,
22   kMSDOS_US = 437,
23   kArabic_ASMO708 = 708,
24   kMSDOS_Greek1 = 737,
25   kMSDOS_Baltic = 775,
26   kMSDOS_WesternEuropean = 850,
27   kMSDOS_EasternEuropean = 852,
28   kMSDOS_Cyrillic = 855,
29   kMSDOS_Turkish = 857,
30   kMSDOS_Portuguese = 860,
31   kMSDOS_Icelandic = 861,
32   kMSDOS_Hebrew = 862,
33   kMSDOS_FrenchCanadian = 863,
34   kMSDOS_Arabic = 864,
35   kMSDOS_Norwegian = 865,
36   kMSDOS_Russian = 866,
37   kMSDOS_Greek2 = 869,
38   kMSDOS_Thai = 874,
39   kShiftJIS = 932,
40   kChineseSimplified = 936,
41   kHangul = 949,
42   kChineseTraditional = 950,
43   kUTF16LE = 1200,
44   kUTF16BE = 1201,
45   kMSWin_EasternEuropean = 1250,
46   kMSWin_Cyrillic = 1251,
47   kMSWin_WesternEuropean = 1252,
48   kMSWin_Greek = 1253,
49   kMSWin_Turkish = 1254,
50   kMSWin_Hebrew = 1255,
51   kMSWin_Arabic = 1256,
52   kMSWin_Baltic = 1257,
53   kMSWin_Vietnamese = 1258,
54   kJohab = 1361,
55   kMAC_Roman = 10000,
56   kMAC_ShiftJIS = 10001,
57   kMAC_ChineseTraditional = 10002,
58   kMAC_Korean = 10003,
59   kMAC_Arabic = 10004,
60   kMAC_Hebrew = 10005,
61   kMAC_Greek = 10006,
62   kMAC_Cyrillic = 10007,
63   kMAC_ChineseSimplified = 10008,
64   kMAC_Thai = 10021,
65   kMAC_EasternEuropean = 10029,
66   kMAC_Turkish = 10081,
67   kUTF8 = 65001,
68   kFailure = 65535,
69 };
70 
71 enum class FX_Charset : uint8_t {
72   kANSI = 0,
73   kDefault = 1,
74   kSymbol = 2,
75   kMAC_Roman = 77,
76   kMAC_ShiftJIS = 78,
77   kMAC_Korean = 79,
78   kMAC_ChineseSimplified = 80,
79   kMAC_ChineseTraditional = 81,
80   kMAC_Hebrew = 83,
81   kMAC_Arabic = 84,
82   kMAC_Greek = 85,
83   kMAC_Turkish = 86,
84   kMAC_Thai = 87,
85   kMAC_EasternEuropean = 88,
86   kMAC_Cyrillic = 89,
87   kShiftJIS = 128,
88   kHangul = 129,
89   kJohab = 130,
90   kChineseSimplified = 134,
91   kChineseTraditional = 136,
92   kMSWin_Greek = 161,
93   kMSWin_Turkish = 162,
94   kMSWin_Vietnamese = 163,
95   kMSWin_Hebrew = 177,
96   kMSWin_Arabic = 178,
97   kMSWin_Baltic = 186,
98   kMSWin_Cyrillic = 204,
99   kThai = 222,
100   kMSWin_EasternEuropean = 238,
101   kUS = 254,
102   kOEM = 255,
103 };
104 
105 // Hi-bytes to unicode codepoint mapping for various code pages.
106 struct FX_CharsetUnicodes {
107   FX_Charset m_Charset;
108   pdfium::span<const uint16_t> m_pUnicodes;
109 };
110 
111 extern const std::array<FX_CharsetUnicodes, 8> kFX_CharsetUnicodes;
112 
113 FX_CodePage FX_GetACP();
114 FX_CodePage FX_GetCodePageFromCharset(FX_Charset charset);
115 FX_Charset FX_GetCharsetFromCodePage(FX_CodePage codepage);
116 FX_Charset FX_GetCharsetFromInt(int value);
117 bool FX_CharSetIsCJK(FX_Charset uCharset);
118 size_t FX_WideCharToMultiByte(FX_CodePage codepage,
119                               WideStringView wstr,
120                               pdfium::span<char> buf);
121 size_t FX_MultiByteToWideChar(FX_CodePage codepage,
122                               ByteStringView bstr,
123                               pdfium::span<wchar_t> buf);
124 
125 #endif  // CORE_FXCRT_FX_CODEPAGE_H_
126