1 // Copyright 2014 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FXCRT_FX_CODEPAGE_H_ 8 #define CORE_FXCRT_FX_CODEPAGE_H_ 9 10 #include <stdint.h> 11 12 #include <array> 13 14 // Prove consistency with incomplete forward definitions. 15 #include "core/fxcrt/fx_codepage_forward.h" 16 #include "core/fxcrt/fx_string.h" 17 #include "core/fxcrt/span.h" 18 19 enum class FX_CodePage : uint16_t { 20 kDefANSI = 0, 21 kSymbol = 42, 22 kMSDOS_US = 437, 23 kArabic_ASMO708 = 708, 24 kMSDOS_Greek1 = 737, 25 kMSDOS_Baltic = 775, 26 kMSDOS_WesternEuropean = 850, 27 kMSDOS_EasternEuropean = 852, 28 kMSDOS_Cyrillic = 855, 29 kMSDOS_Turkish = 857, 30 kMSDOS_Portuguese = 860, 31 kMSDOS_Icelandic = 861, 32 kMSDOS_Hebrew = 862, 33 kMSDOS_FrenchCanadian = 863, 34 kMSDOS_Arabic = 864, 35 kMSDOS_Norwegian = 865, 36 kMSDOS_Russian = 866, 37 kMSDOS_Greek2 = 869, 38 kMSDOS_Thai = 874, 39 kShiftJIS = 932, 40 kChineseSimplified = 936, 41 kHangul = 949, 42 kChineseTraditional = 950, 43 kUTF16LE = 1200, 44 kUTF16BE = 1201, 45 kMSWin_EasternEuropean = 1250, 46 kMSWin_Cyrillic = 1251, 47 kMSWin_WesternEuropean = 1252, 48 kMSWin_Greek = 1253, 49 kMSWin_Turkish = 1254, 50 kMSWin_Hebrew = 1255, 51 kMSWin_Arabic = 1256, 52 kMSWin_Baltic = 1257, 53 kMSWin_Vietnamese = 1258, 54 kJohab = 1361, 55 kMAC_Roman = 10000, 56 kMAC_ShiftJIS = 10001, 57 kMAC_ChineseTraditional = 10002, 58 kMAC_Korean = 10003, 59 kMAC_Arabic = 10004, 60 kMAC_Hebrew = 10005, 61 kMAC_Greek = 10006, 62 kMAC_Cyrillic = 10007, 63 kMAC_ChineseSimplified = 10008, 64 kMAC_Thai = 10021, 65 kMAC_EasternEuropean = 10029, 66 kMAC_Turkish = 10081, 67 kUTF8 = 65001, 68 kFailure = 65535, 69 }; 70 71 enum class FX_Charset : uint8_t { 72 kANSI = 0, 73 kDefault = 1, 74 kSymbol = 2, 75 kMAC_Roman = 77, 76 kMAC_ShiftJIS = 78, 77 kMAC_Korean = 79, 78 kMAC_ChineseSimplified = 80, 79 kMAC_ChineseTraditional = 81, 80 kMAC_Hebrew = 83, 81 kMAC_Arabic = 84, 82 kMAC_Greek = 85, 83 kMAC_Turkish = 86, 84 kMAC_Thai = 87, 85 kMAC_EasternEuropean = 88, 86 kMAC_Cyrillic = 89, 87 kShiftJIS = 128, 88 kHangul = 129, 89 kJohab = 130, 90 kChineseSimplified = 134, 91 kChineseTraditional = 136, 92 kMSWin_Greek = 161, 93 kMSWin_Turkish = 162, 94 kMSWin_Vietnamese = 163, 95 kMSWin_Hebrew = 177, 96 kMSWin_Arabic = 178, 97 kMSWin_Baltic = 186, 98 kMSWin_Cyrillic = 204, 99 kThai = 222, 100 kMSWin_EasternEuropean = 238, 101 kUS = 254, 102 kOEM = 255, 103 }; 104 105 // Hi-bytes to unicode codepoint mapping for various code pages. 106 struct FX_CharsetUnicodes { 107 FX_Charset m_Charset; 108 pdfium::span<const uint16_t> m_pUnicodes; 109 }; 110 111 extern const std::array<FX_CharsetUnicodes, 8> kFX_CharsetUnicodes; 112 113 FX_CodePage FX_GetACP(); 114 FX_CodePage FX_GetCodePageFromCharset(FX_Charset charset); 115 FX_Charset FX_GetCharsetFromCodePage(FX_CodePage codepage); 116 FX_Charset FX_GetCharsetFromInt(int value); 117 bool FX_CharSetIsCJK(FX_Charset uCharset); 118 size_t FX_WideCharToMultiByte(FX_CodePage codepage, 119 WideStringView wstr, 120 pdfium::span<char> buf); 121 size_t FX_MultiByteToWideChar(FX_CodePage codepage, 122 ByteStringView bstr, 123 pdfium::span<wchar_t> buf); 124 125 #endif // CORE_FXCRT_FX_CODEPAGE_H_ 126