1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/font/cpdf_cmapparser.h"
8
9 #include <ctype.h>
10
11 #include <array>
12 #include <iterator>
13
14 #include "core/fpdfapi/cmaps/fpdf_cmaps.h"
15 #include "core/fpdfapi/parser/cpdf_array.h"
16 #include "core/fpdfapi/parser/cpdf_dictionary.h"
17 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
18 #include "core/fxcrt/check.h"
19 #include "core/fxcrt/fx_extension.h"
20 #include "core/fxcrt/fx_safe_types.h"
21
22 namespace {
23
CMap_GetString(ByteStringView word)24 ByteStringView CMap_GetString(ByteStringView word) {
25 if (word.GetLength() <= 2)
26 return ByteStringView();
27 return word.Last(word.GetLength() - 2);
28 }
29
30 } // namespace
31
CPDF_CMapParser(CPDF_CMap * pCMap)32 CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap) : m_pCMap(pCMap) {}
33
~CPDF_CMapParser()34 CPDF_CMapParser::~CPDF_CMapParser() {
35 m_pCMap->SetAdditionalMappings(std::move(m_AdditionalCharcodeToCIDMappings));
36 m_pCMap->SetMixedFourByteLeadingRanges(std::move(m_Ranges));
37 }
38
ParseWord(ByteStringView word)39 void CPDF_CMapParser::ParseWord(ByteStringView word) {
40 DCHECK(!word.IsEmpty());
41
42 if (word == "begincidchar") {
43 m_Status = kProcessingCidChar;
44 m_CodeSeq = 0;
45 } else if (word == "begincidrange") {
46 m_Status = kProcessingCidRange;
47 m_CodeSeq = 0;
48 } else if (word == "endcidrange" || word == "endcidchar") {
49 m_Status = kStart;
50 } else if (word == "/WMode") {
51 m_Status = kProcessingWMode;
52 } else if (word == "/Registry") {
53 m_Status = kProcessingRegistry;
54 } else if (word == "/Ordering") {
55 m_Status = kProcessingOrdering;
56 } else if (word == "/Supplement") {
57 m_Status = kProcessingSupplement;
58 } else if (word == "begincodespacerange") {
59 m_Status = kProcessingCodeSpaceRange;
60 m_CodeSeq = 0;
61 } else if (word == "usecmap") {
62 } else if (m_Status == kProcessingCidChar) {
63 HandleCid(word);
64 } else if (m_Status == kProcessingCidRange) {
65 HandleCid(word);
66 } else if (m_Status == kProcessingRegistry) {
67 m_Status = kStart;
68 } else if (m_Status == kProcessingOrdering) {
69 m_pCMap->SetCharset(CharsetFromOrdering(CMap_GetString(word)));
70 m_Status = kStart;
71 } else if (m_Status == kProcessingSupplement) {
72 m_Status = kStart;
73 } else if (m_Status == kProcessingWMode) {
74 m_pCMap->SetVertical(GetCode(word) != 0);
75 m_Status = kStart;
76 } else if (m_Status == kProcessingCodeSpaceRange) {
77 HandleCodeSpaceRange(word);
78 }
79 m_LastWord = word;
80 }
81
HandleCid(ByteStringView word)82 void CPDF_CMapParser::HandleCid(ByteStringView word) {
83 DCHECK(m_Status == kProcessingCidChar || m_Status == kProcessingCidRange);
84 bool bChar = m_Status == kProcessingCidChar;
85
86 m_CodePoints[m_CodeSeq] = GetCode(word);
87 m_CodeSeq++;
88 int nRequiredCodePoints = bChar ? 2 : 3;
89 if (m_CodeSeq < nRequiredCodePoints)
90 return;
91
92 uint32_t StartCode = m_CodePoints[0];
93 uint32_t EndCode;
94 uint16_t StartCID;
95 if (bChar) {
96 EndCode = StartCode;
97 StartCID = static_cast<uint16_t>(m_CodePoints[1]);
98 } else {
99 EndCode = m_CodePoints[1];
100 StartCID = static_cast<uint16_t>(m_CodePoints[2]);
101 }
102 if (EndCode < CPDF_CMap::kDirectMapTableSize) {
103 m_pCMap->SetDirectCharcodeToCIDTableRange(StartCode, EndCode, StartCID);
104 } else {
105 m_AdditionalCharcodeToCIDMappings.push_back({StartCode, EndCode, StartCID});
106 }
107 m_CodeSeq = 0;
108 }
109
HandleCodeSpaceRange(ByteStringView word)110 void CPDF_CMapParser::HandleCodeSpaceRange(ByteStringView word) {
111 if (word != "endcodespacerange") {
112 if (word.IsEmpty() || word[0] != '<')
113 return;
114
115 if (m_CodeSeq % 2) {
116 std::optional<CPDF_CMap::CodeRange> range =
117 GetCodeRange(m_LastWord.AsStringView(), word);
118 if (range.has_value())
119 m_PendingRanges.push_back(range.value());
120 }
121 m_CodeSeq++;
122 return;
123 }
124
125 size_t nSegs = m_Ranges.size() + m_PendingRanges.size();
126 if (nSegs == 1) {
127 const auto& first_range =
128 !m_Ranges.empty() ? m_Ranges[0] : m_PendingRanges[0];
129 m_pCMap->SetCodingScheme(first_range.m_CharSize == 2 ? CPDF_CMap::TwoBytes
130 : CPDF_CMap::OneByte);
131 } else if (nSegs > 1) {
132 m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes);
133 m_Ranges.reserve(nSegs);
134 std::move(m_PendingRanges.begin(), m_PendingRanges.end(),
135 std::back_inserter(m_Ranges));
136 m_PendingRanges.clear();
137 }
138 m_Status = kStart;
139 }
140
141 // static
GetCode(ByteStringView word)142 uint32_t CPDF_CMapParser::GetCode(ByteStringView word) {
143 if (word.IsEmpty())
144 return 0;
145
146 FX_SAFE_UINT32 num = 0;
147 if (word[0] == '<') {
148 for (size_t i = 1; i < word.GetLength() && isxdigit(word[i]); ++i) {
149 num = num * 16 + FXSYS_HexCharToInt(word[i]);
150 if (!num.IsValid())
151 return 0;
152 }
153 return num.ValueOrDie();
154 }
155
156 for (size_t i = 0; i < word.GetLength() && isdigit(word[i]); ++i) {
157 num = num * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(word[i]));
158 if (!num.IsValid())
159 return 0;
160 }
161 return num.ValueOrDie();
162 }
163
164 // static
GetCodeRange(ByteStringView first,ByteStringView second)165 std::optional<CPDF_CMap::CodeRange> CPDF_CMapParser::GetCodeRange(
166 ByteStringView first,
167 ByteStringView second) {
168 if (first.IsEmpty() || first[0] != '<')
169 return std::nullopt;
170
171 size_t i;
172 for (i = 1; i < first.GetLength(); ++i) {
173 if (first[i] == '>')
174 break;
175 }
176 size_t char_size = (i - 1) / 2;
177 if (char_size > 4)
178 return std::nullopt;
179
180 CPDF_CMap::CodeRange range;
181 range.m_CharSize = char_size;
182 for (i = 0; i < range.m_CharSize; ++i) {
183 uint8_t digit1 = first[i * 2 + 1];
184 uint8_t digit2 = first[i * 2 + 2];
185 range.m_Lower[i] =
186 FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
187 }
188
189 size_t size = second.GetLength();
190 for (i = 0; i < range.m_CharSize; ++i) {
191 size_t i1 = i * 2 + 1;
192 size_t i2 = i1 + 1;
193 uint8_t digit1 = i1 < size ? second[i1] : '0';
194 uint8_t digit2 = i2 < size ? second[i2] : '0';
195 range.m_Upper[i] =
196 FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
197 }
198 return range;
199 }
200
201 // static
CharsetFromOrdering(ByteStringView ordering)202 CIDSet CPDF_CMapParser::CharsetFromOrdering(ByteStringView ordering) {
203 static const std::array<const char*, CIDSET_NUM_SETS> kCharsetNames = {
204 {nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"}};
205
206 for (size_t charset = 1; charset < std::size(kCharsetNames); ++charset) {
207 if (ordering == kCharsetNames[charset])
208 return static_cast<CIDSet>(charset);
209 }
210 return CIDSET_UNKNOWN;
211 }
212