1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8 * Copyright 2006-2007 Jeremias Maerki.
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 */
22
23 #include "fxbarcode/datamatrix/BC_HighLevelEncoder.h"
24
25 #include <algorithm>
26 #include <array>
27 #include <limits>
28 #include <memory>
29 #include <vector>
30
31 #include "core/fxcrt/fx_extension.h"
32 #include "fxbarcode/common/BC_CommonBitMatrix.h"
33 #include "fxbarcode/datamatrix/BC_ASCIIEncoder.h"
34 #include "fxbarcode/datamatrix/BC_Base256Encoder.h"
35 #include "fxbarcode/datamatrix/BC_C40Encoder.h"
36 #include "fxbarcode/datamatrix/BC_EdifactEncoder.h"
37 #include "fxbarcode/datamatrix/BC_Encoder.h"
38 #include "fxbarcode/datamatrix/BC_EncoderContext.h"
39 #include "fxbarcode/datamatrix/BC_SymbolInfo.h"
40 #include "fxbarcode/datamatrix/BC_TextEncoder.h"
41 #include "fxbarcode/datamatrix/BC_X12Encoder.h"
42 #include "third_party/base/ptr_util.h"
43
44 namespace {
45
46 const wchar_t kPad = 129;
47 const wchar_t kMacro05 = 236;
48 const wchar_t kMacro06 = 237;
49 const wchar_t kMacro05Header[] =
50 L"[)>\036"
51 L"05";
52 const wchar_t kMacro06Header[] =
53 L"[)>\036"
54 L"06";
55 const wchar_t kMacroTrailer = 0x0004;
56
57 constexpr size_t kEncoderCount =
58 static_cast<size_t>(CBC_HighLevelEncoder::Encoding::LAST) + 1;
59 static_assert(kEncoderCount == 6, "Bad encoder count");
60
Randomize253State(wchar_t ch,int32_t codewordPosition)61 wchar_t Randomize253State(wchar_t ch, int32_t codewordPosition) {
62 int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1;
63 int32_t tempVariable = ch + pseudoRandom;
64 return tempVariable <= 254 ? static_cast<wchar_t>(tempVariable)
65 : static_cast<wchar_t>(tempVariable - 254);
66 }
67
FindMinimums(const std::array<float,kEncoderCount> & charCounts,std::array<int32_t,kEncoderCount> * intCharCounts,std::array<uint8_t,kEncoderCount> * mins)68 int32_t FindMinimums(const std::array<float, kEncoderCount>& charCounts,
69 std::array<int32_t, kEncoderCount>* intCharCounts,
70 std::array<uint8_t, kEncoderCount>* mins) {
71 int32_t min = std::numeric_limits<int32_t>::max();
72 for (size_t i = 0; i < kEncoderCount; ++i) {
73 int32_t current = static_cast<int32_t>(ceil(charCounts[i]));
74 (*intCharCounts)[i] = current;
75 if (min > current) {
76 min = current;
77 for (auto& m : *mins)
78 m = 0;
79 }
80 if (min == current)
81 (*mins)[i]++;
82 }
83 return min;
84 }
85
GetMinimumCount(const std::array<uint8_t,kEncoderCount> & mins)86 int32_t GetMinimumCount(const std::array<uint8_t, kEncoderCount>& mins) {
87 int32_t count = 0;
88 for (const auto& m : mins)
89 count += m;
90 return count;
91 }
92
IsNativeC40(wchar_t ch)93 bool IsNativeC40(wchar_t ch) {
94 return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
95 }
96
IsNativeText(wchar_t ch)97 bool IsNativeText(wchar_t ch) {
98 return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z');
99 }
100
IsX12TermSep(wchar_t ch)101 bool IsX12TermSep(wchar_t ch) {
102 return (ch == '\r') || (ch == '*') || (ch == '>');
103 }
104
IsNativeX12(wchar_t ch)105 bool IsNativeX12(wchar_t ch) {
106 return IsX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') ||
107 (ch >= 'A' && ch <= 'Z');
108 }
109
IsNativeEDIFACT(wchar_t ch)110 bool IsNativeEDIFACT(wchar_t ch) {
111 return ch >= ' ' && ch <= '^';
112 }
113
EncoderIndex(CBC_HighLevelEncoder::Encoding encoding)114 size_t EncoderIndex(CBC_HighLevelEncoder::Encoding encoding) {
115 ASSERT(encoding != CBC_HighLevelEncoder::Encoding::UNKNOWN);
116 return static_cast<size_t>(encoding);
117 }
118
119 } // namespace
120
121 // static
EncodeHighLevel(const WideString & msg)122 WideString CBC_HighLevelEncoder::EncodeHighLevel(const WideString& msg) {
123 // Per spec. Alpha numeric input is even shorter.
124 static constexpr size_t kMaxNumericInputLength = 3116;
125
126 // Exit early if the input is too long. It will fail no matter what.
127 if (msg.GetLength() > kMaxNumericInputLength)
128 return WideString();
129
130 CBC_EncoderContext context(msg);
131 if (context.HasCharactersOutsideISO88591Encoding())
132 return WideString();
133
134 if (msg.Back() == kMacroTrailer) {
135 WideString left = msg.First(6);
136 if (left == kMacro05Header) {
137 context.writeCodeword(kMacro05);
138 context.setSkipAtEnd(2);
139 context.m_pos += 6;
140 } else if (left == kMacro06Header) {
141 context.writeCodeword(kMacro06);
142 context.setSkipAtEnd(2);
143 context.m_pos += 6;
144 }
145 }
146
147 std::vector<std::unique_ptr<CBC_Encoder>> encoders;
148 encoders.push_back(pdfium::MakeUnique<CBC_ASCIIEncoder>());
149 encoders.push_back(pdfium::MakeUnique<CBC_C40Encoder>());
150 encoders.push_back(pdfium::MakeUnique<CBC_TextEncoder>());
151 encoders.push_back(pdfium::MakeUnique<CBC_X12Encoder>());
152 encoders.push_back(pdfium::MakeUnique<CBC_EdifactEncoder>());
153 encoders.push_back(pdfium::MakeUnique<CBC_Base256Encoder>());
154 Encoding encodingMode = Encoding::ASCII;
155 while (context.hasMoreCharacters()) {
156 if (!encoders[EncoderIndex(encodingMode)]->Encode(&context))
157 return WideString();
158
159 if (context.m_newEncoding != Encoding::UNKNOWN) {
160 encodingMode = context.m_newEncoding;
161 context.ResetEncoderSignal();
162 }
163 }
164 size_t len = context.m_codewords.GetLength();
165 if (!context.UpdateSymbolInfo())
166 return WideString();
167
168 size_t capacity = context.m_symbolInfo->dataCapacity();
169 if (len < capacity) {
170 if (encodingMode != Encoding::ASCII && encodingMode != Encoding::BASE256)
171 context.writeCodeword(0x00fe);
172 }
173 WideString codewords = context.m_codewords;
174 if (codewords.GetLength() < capacity)
175 codewords += kPad;
176
177 while (codewords.GetLength() < capacity)
178 codewords += Randomize253State(kPad, codewords.GetLength() + 1);
179
180 ASSERT(!codewords.IsEmpty());
181 return codewords;
182 }
183
184 // static
LookAheadTest(const WideString & msg,size_t startpos,CBC_HighLevelEncoder::Encoding currentMode)185 CBC_HighLevelEncoder::Encoding CBC_HighLevelEncoder::LookAheadTest(
186 const WideString& msg,
187 size_t startpos,
188 CBC_HighLevelEncoder::Encoding currentMode) {
189 if (startpos >= msg.GetLength())
190 return currentMode;
191
192 std::array<float, kEncoderCount> charCounts;
193 if (currentMode == Encoding::ASCII) {
194 charCounts = {0, 1, 1, 1, 1, 1.25f};
195 } else {
196 charCounts = {1, 2, 2, 2, 2, 2.25f};
197 charCounts[EncoderIndex(currentMode)] = 0;
198 }
199
200 size_t charsProcessed = 0;
201 while (true) {
202 if ((startpos + charsProcessed) == msg.GetLength()) {
203 std::array<int32_t, kEncoderCount> intCharCounts;
204 std::array<uint8_t, kEncoderCount> mins;
205 int32_t min = FindMinimums(charCounts, &intCharCounts, &mins);
206 if (intCharCounts[EncoderIndex(Encoding::ASCII)] == min)
207 return Encoding::ASCII;
208 const int32_t minCount = GetMinimumCount(mins);
209 if (minCount == 1) {
210 if (mins[EncoderIndex(Encoding::BASE256)] > 0)
211 return Encoding::BASE256;
212 if (mins[EncoderIndex(Encoding::EDIFACT)] > 0)
213 return Encoding::EDIFACT;
214 if (mins[EncoderIndex(Encoding::TEXT)] > 0)
215 return Encoding::TEXT;
216 if (mins[EncoderIndex(Encoding::X12)] > 0)
217 return Encoding::X12;
218 }
219 return Encoding::C40;
220 }
221
222 wchar_t c = msg[startpos + charsProcessed];
223 charsProcessed++;
224 {
225 auto& count = charCounts[EncoderIndex(Encoding::ASCII)];
226 if (FXSYS_IsDecimalDigit(c))
227 count += 0.5;
228 else if (IsExtendedASCII(c))
229 count = ceilf(count) + 2;
230 else
231 count = ceilf(count) + 1;
232 }
233
234 {
235 auto& count = charCounts[EncoderIndex(Encoding::C40)];
236 if (IsNativeC40(c))
237 count += 2.0f / 3.0f;
238 else if (IsExtendedASCII(c))
239 count += 8.0f / 3.0f;
240 else
241 count += 4.0f / 3.0f;
242 }
243
244 {
245 auto& count = charCounts[EncoderIndex(Encoding::TEXT)];
246 if (IsNativeText(c))
247 count += 2.0f / 3.0f;
248 else if (IsExtendedASCII(c))
249 count += 8.0f / 3.0f;
250 else
251 count += 4.0f / 3.0f;
252 }
253
254 {
255 auto& count = charCounts[EncoderIndex(Encoding::X12)];
256 if (IsNativeX12(c))
257 count += 2.0f / 3.0f;
258 else if (IsExtendedASCII(c))
259 count += 13.0f / 3.0f;
260 else
261 count += 10.0f / 3.0f;
262 }
263
264 {
265 auto& count = charCounts[EncoderIndex(Encoding::EDIFACT)];
266 if (IsNativeEDIFACT(c))
267 count += 3.0f / 4.0f;
268 else if (IsExtendedASCII(c))
269 count += 17.0f / 4.0f;
270 else
271 count += 13.0f / 4.0f;
272 }
273
274 charCounts[EncoderIndex(Encoding::BASE256)]++;
275 if (charsProcessed < 4)
276 continue;
277
278 std::array<int32_t, kEncoderCount> intCharCounts;
279 std::array<uint8_t, kEncoderCount> mins;
280 FindMinimums(charCounts, &intCharCounts, &mins);
281 int32_t minCount = GetMinimumCount(mins);
282 int32_t ascii_count = intCharCounts[EncoderIndex(Encoding::ASCII)];
283 int32_t c40_count = intCharCounts[EncoderIndex(Encoding::C40)];
284 int32_t text_count = intCharCounts[EncoderIndex(Encoding::TEXT)];
285 int32_t x12_count = intCharCounts[EncoderIndex(Encoding::X12)];
286 int32_t editfact_count = intCharCounts[EncoderIndex(Encoding::EDIFACT)];
287 int32_t base256_count = intCharCounts[EncoderIndex(Encoding::BASE256)];
288 int32_t bet_min = std::min({base256_count, editfact_count, text_count});
289 if (ascii_count < bet_min && ascii_count < c40_count &&
290 ascii_count < x12_count) {
291 return Encoding::ASCII;
292 }
293 if (base256_count < ascii_count ||
294 (mins[EncoderIndex(Encoding::C40)] +
295 mins[EncoderIndex(Encoding::TEXT)] +
296 mins[EncoderIndex(Encoding::X12)] +
297 mins[EncoderIndex(Encoding::EDIFACT)]) == 0) {
298 return Encoding::BASE256;
299 }
300 if (minCount == 1) {
301 if (mins[EncoderIndex(Encoding::EDIFACT)] > 0)
302 return Encoding::EDIFACT;
303 if (mins[EncoderIndex(Encoding::TEXT)] > 0)
304 return Encoding::TEXT;
305 if (mins[EncoderIndex(Encoding::X12)] > 0)
306 return Encoding::X12;
307 }
308 if (c40_count + 1 < ascii_count && c40_count + 1 < bet_min) {
309 if (c40_count < x12_count)
310 return Encoding::C40;
311 if (c40_count == x12_count) {
312 size_t p = startpos + charsProcessed + 1;
313 while (p < msg.GetLength()) {
314 wchar_t tc = msg[p];
315 if (IsX12TermSep(tc))
316 return Encoding::X12;
317 if (!IsNativeX12(tc))
318 break;
319 p++;
320 }
321 return Encoding::C40;
322 }
323 }
324 }
325 }
326
327 // static
IsExtendedASCII(wchar_t ch)328 bool CBC_HighLevelEncoder::IsExtendedASCII(wchar_t ch) {
329 return ch >= 128 && ch <= 255;
330 }
331