1 // Copyright 2014 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FXCRT_FX_CODEPAGE_H_ 8 #define CORE_FXCRT_FX_CODEPAGE_H_ 9 10 #include <stdint.h> 11 12 // Prove consistency with incomplete forward definitions. 13 #include "core/fxcrt/fx_codepage_forward.h" 14 #include "core/fxcrt/fx_string.h" 15 #include "third_party/base/span.h" 16 17 enum class FX_CodePage : uint16_t { 18 kDefANSI = 0, 19 kSymbol = 42, 20 kMSDOS_US = 437, 21 kArabic_ASMO708 = 708, 22 kMSDOS_Greek1 = 737, 23 kMSDOS_Baltic = 775, 24 kMSDOS_WesternEuropean = 850, 25 kMSDOS_EasternEuropean = 852, 26 kMSDOS_Cyrillic = 855, 27 kMSDOS_Turkish = 857, 28 kMSDOS_Portuguese = 860, 29 kMSDOS_Icelandic = 861, 30 kMSDOS_Hebrew = 862, 31 kMSDOS_FrenchCanadian = 863, 32 kMSDOS_Arabic = 864, 33 kMSDOS_Norwegian = 865, 34 kMSDOS_Russian = 866, 35 kMSDOS_Greek2 = 869, 36 kMSDOS_Thai = 874, 37 kShiftJIS = 932, 38 kChineseSimplified = 936, 39 kHangul = 949, 40 kChineseTraditional = 950, 41 kUTF16LE = 1200, 42 kUTF16BE = 1201, 43 kMSWin_EasternEuropean = 1250, 44 kMSWin_Cyrillic = 1251, 45 kMSWin_WesternEuropean = 1252, 46 kMSWin_Greek = 1253, 47 kMSWin_Turkish = 1254, 48 kMSWin_Hebrew = 1255, 49 kMSWin_Arabic = 1256, 50 kMSWin_Baltic = 1257, 51 kMSWin_Vietnamese = 1258, 52 kJohab = 1361, 53 kMAC_Roman = 10000, 54 kMAC_ShiftJIS = 10001, 55 kMAC_ChineseTraditional = 10002, 56 kMAC_Korean = 10003, 57 kMAC_Arabic = 10004, 58 kMAC_Hebrew = 10005, 59 kMAC_Greek = 10006, 60 kMAC_Cyrillic = 10007, 61 kMAC_ChineseSimplified = 10008, 62 kMAC_Thai = 10021, 63 kMAC_EasternEuropean = 10029, 64 kMAC_Turkish = 10081, 65 kUTF8 = 65001, 66 kFailure = 65535, 67 }; 68 69 enum class FX_Charset : uint8_t { 70 kANSI = 0, 71 kDefault = 1, 72 kSymbol = 2, 73 kMAC_Roman = 77, 74 kMAC_ShiftJIS = 78, 75 kMAC_Korean = 79, 76 kMAC_ChineseSimplified = 80, 77 kMAC_ChineseTraditional = 81, 78 kMAC_Hebrew = 83, 79 kMAC_Arabic = 84, 80 kMAC_Greek = 85, 81 kMAC_Turkish = 86, 82 kMAC_Thai = 87, 83 kMAC_EasternEuropean = 88, 84 kMAC_Cyrillic = 89, 85 kShiftJIS = 128, 86 kHangul = 129, 87 kJohab = 130, 88 kChineseSimplified = 134, 89 kChineseTraditional = 136, 90 kMSWin_Greek = 161, 91 kMSWin_Turkish = 162, 92 kMSWin_Vietnamese = 163, 93 kMSWin_Hebrew = 177, 94 kMSWin_Arabic = 178, 95 kMSWin_Baltic = 186, 96 kMSWin_Cyrillic = 204, 97 kThai = 222, 98 kMSWin_EasternEuropean = 238, 99 kUS = 254, 100 kOEM = 255, 101 }; 102 103 // Hi-bytes to unicode codepoint mapping for various code pages. 104 struct FX_CharsetUnicodes { 105 FX_Charset m_Charset; 106 const uint16_t* m_pUnicodes; // Raw, POD struct. 107 }; 108 109 extern const FX_CharsetUnicodes kFX_CharsetUnicodes[8]; 110 111 FX_CodePage FX_GetACP(); 112 FX_CodePage FX_GetCodePageFromCharset(FX_Charset charset); 113 FX_Charset FX_GetCharsetFromCodePage(FX_CodePage codepage); 114 FX_Charset FX_GetCharsetFromInt(int value); 115 bool FX_CharSetIsCJK(FX_Charset uCharset); 116 size_t FX_WideCharToMultiByte(FX_CodePage codepage, 117 WideStringView wstr, 118 pdfium::span<char> buf); 119 size_t FX_MultiByteToWideChar(FX_CodePage codepage, 120 ByteStringView bstr, 121 pdfium::span<wchar_t> buf); 122 123 #endif // CORE_FXCRT_FX_CODEPAGE_H_ 124