1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8 * Copyright 2006-2007 Jeremias Maerki.
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 */
22
23 #include "fxbarcode/datamatrix/BC_HighLevelEncoder.h"
24
25 #include <algorithm>
26 #include <array>
27 #include <limits>
28 #include <memory>
29 #include <vector>
30
31 #include "core/fxcrt/check.h"
32 #include "core/fxcrt/fx_extension.h"
33 #include "fxbarcode/datamatrix/BC_ASCIIEncoder.h"
34 #include "fxbarcode/datamatrix/BC_Base256Encoder.h"
35 #include "fxbarcode/datamatrix/BC_C40Encoder.h"
36 #include "fxbarcode/datamatrix/BC_EdifactEncoder.h"
37 #include "fxbarcode/datamatrix/BC_Encoder.h"
38 #include "fxbarcode/datamatrix/BC_EncoderContext.h"
39 #include "fxbarcode/datamatrix/BC_SymbolInfo.h"
40 #include "fxbarcode/datamatrix/BC_TextEncoder.h"
41 #include "fxbarcode/datamatrix/BC_X12Encoder.h"
42
43 namespace {
44
45 const wchar_t kPad = 129;
46 const wchar_t kMacro05 = 236;
47 const wchar_t kMacro06 = 237;
48 const wchar_t kMacro05Header[] =
49 L"[)>\036"
50 L"05";
51 const wchar_t kMacro06Header[] =
52 L"[)>\036"
53 L"06";
54 const wchar_t kMacroTrailer = 0x0004;
55
56 constexpr size_t kEncoderCount =
57 static_cast<size_t>(CBC_HighLevelEncoder::Encoding::LAST) + 1;
58 static_assert(kEncoderCount == 6, "Bad encoder count");
59
Randomize253State(wchar_t ch,int32_t codewordPosition)60 wchar_t Randomize253State(wchar_t ch, int32_t codewordPosition) {
61 int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1;
62 int32_t tempVariable = ch + pseudoRandom;
63 return tempVariable <= 254 ? static_cast<wchar_t>(tempVariable)
64 : static_cast<wchar_t>(tempVariable - 254);
65 }
66
FindMinimums(const std::array<float,kEncoderCount> & charCounts,std::array<int32_t,kEncoderCount> * intCharCounts,std::array<uint8_t,kEncoderCount> * mins)67 int32_t FindMinimums(const std::array<float, kEncoderCount>& charCounts,
68 std::array<int32_t, kEncoderCount>* intCharCounts,
69 std::array<uint8_t, kEncoderCount>* mins) {
70 int32_t min = std::numeric_limits<int32_t>::max();
71 for (size_t i = 0; i < kEncoderCount; ++i) {
72 int32_t current = static_cast<int32_t>(ceil(charCounts[i]));
73 (*intCharCounts)[i] = current;
74 if (min > current) {
75 min = current;
76 for (auto& m : *mins)
77 m = 0;
78 }
79 if (min == current)
80 (*mins)[i]++;
81 }
82 return min;
83 }
84
GetMinimumCount(const std::array<uint8_t,kEncoderCount> & mins)85 int32_t GetMinimumCount(const std::array<uint8_t, kEncoderCount>& mins) {
86 int32_t count = 0;
87 for (const auto& m : mins)
88 count += m;
89 return count;
90 }
91
IsNativeC40(wchar_t ch)92 bool IsNativeC40(wchar_t ch) {
93 return (ch == ' ') || (ch >= '0' && ch <= '9') || FXSYS_IsUpperASCII(ch);
94 }
95
IsNativeText(wchar_t ch)96 bool IsNativeText(wchar_t ch) {
97 return (ch == ' ') || (ch >= '0' && ch <= '9') || FXSYS_IsLowerASCII(ch);
98 }
99
IsX12TermSep(wchar_t ch)100 bool IsX12TermSep(wchar_t ch) {
101 return (ch == '\r') || (ch == '*') || (ch == '>');
102 }
103
IsNativeX12(wchar_t ch)104 bool IsNativeX12(wchar_t ch) {
105 return IsX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') ||
106 FXSYS_IsUpperASCII(ch);
107 }
108
IsNativeEDIFACT(wchar_t ch)109 bool IsNativeEDIFACT(wchar_t ch) {
110 return ch >= ' ' && ch <= '^';
111 }
112
EncoderIndex(CBC_HighLevelEncoder::Encoding encoding)113 size_t EncoderIndex(CBC_HighLevelEncoder::Encoding encoding) {
114 DCHECK(encoding != CBC_HighLevelEncoder::Encoding::UNKNOWN);
115 return static_cast<size_t>(encoding);
116 }
117
118 } // namespace
119
120 // static
EncodeHighLevel(const WideString & msg)121 WideString CBC_HighLevelEncoder::EncodeHighLevel(const WideString& msg) {
122 // Per spec. Alpha numeric input is even shorter.
123 static constexpr size_t kMaxNumericInputLength = 3116;
124
125 // Exit early if the input is too long. It will fail no matter what.
126 if (msg.GetLength() > kMaxNumericInputLength)
127 return WideString();
128
129 CBC_EncoderContext context(msg);
130 if (context.HasCharactersOutsideISO88591Encoding())
131 return WideString();
132
133 if (msg.Back() == kMacroTrailer) {
134 WideString left = msg.First(6);
135 if (left == kMacro05Header) {
136 context.writeCodeword(kMacro05);
137 context.setSkipAtEnd(2);
138 context.m_pos += 6;
139 } else if (left == kMacro06Header) {
140 context.writeCodeword(kMacro06);
141 context.setSkipAtEnd(2);
142 context.m_pos += 6;
143 }
144 }
145
146 std::vector<std::unique_ptr<CBC_Encoder>> encoders;
147 encoders.push_back(std::make_unique<CBC_ASCIIEncoder>());
148 encoders.push_back(std::make_unique<CBC_C40Encoder>());
149 encoders.push_back(std::make_unique<CBC_TextEncoder>());
150 encoders.push_back(std::make_unique<CBC_X12Encoder>());
151 encoders.push_back(std::make_unique<CBC_EdifactEncoder>());
152 encoders.push_back(std::make_unique<CBC_Base256Encoder>());
153 Encoding encodingMode = Encoding::ASCII;
154 while (context.hasMoreCharacters()) {
155 if (!encoders[EncoderIndex(encodingMode)]->Encode(&context))
156 return WideString();
157
158 if (context.m_newEncoding != Encoding::UNKNOWN) {
159 encodingMode = context.m_newEncoding;
160 context.ResetEncoderSignal();
161 }
162 }
163 size_t len = context.m_codewords.GetLength();
164 if (!context.UpdateSymbolInfo())
165 return WideString();
166
167 size_t capacity = context.m_symbolInfo->data_capacity();
168 if (len < capacity) {
169 if (encodingMode != Encoding::ASCII && encodingMode != Encoding::BASE256)
170 context.writeCodeword(0x00fe);
171 }
172 WideString codewords = context.m_codewords;
173 if (codewords.GetLength() < capacity)
174 codewords += kPad;
175
176 while (codewords.GetLength() < capacity)
177 codewords += Randomize253State(kPad, codewords.GetLength() + 1);
178
179 DCHECK(!codewords.IsEmpty());
180 return codewords;
181 }
182
183 // static
LookAheadTest(const WideString & msg,size_t startpos,CBC_HighLevelEncoder::Encoding currentMode)184 CBC_HighLevelEncoder::Encoding CBC_HighLevelEncoder::LookAheadTest(
185 const WideString& msg,
186 size_t startpos,
187 CBC_HighLevelEncoder::Encoding currentMode) {
188 if (startpos >= msg.GetLength())
189 return currentMode;
190
191 std::array<float, kEncoderCount> charCounts;
192 if (currentMode == Encoding::ASCII) {
193 charCounts = {0, 1, 1, 1, 1, 1.25f};
194 } else {
195 charCounts = {1, 2, 2, 2, 2, 2.25f};
196 charCounts[EncoderIndex(currentMode)] = 0;
197 }
198
199 size_t charsProcessed = 0;
200 while (true) {
201 if ((startpos + charsProcessed) == msg.GetLength()) {
202 std::array<int32_t, kEncoderCount> intCharCounts;
203 std::array<uint8_t, kEncoderCount> mins;
204 int32_t min = FindMinimums(charCounts, &intCharCounts, &mins);
205 if (intCharCounts[EncoderIndex(Encoding::ASCII)] == min)
206 return Encoding::ASCII;
207 const int32_t minCount = GetMinimumCount(mins);
208 if (minCount == 1) {
209 if (mins[EncoderIndex(Encoding::BASE256)] > 0)
210 return Encoding::BASE256;
211 if (mins[EncoderIndex(Encoding::EDIFACT)] > 0)
212 return Encoding::EDIFACT;
213 if (mins[EncoderIndex(Encoding::TEXT)] > 0)
214 return Encoding::TEXT;
215 if (mins[EncoderIndex(Encoding::X12)] > 0)
216 return Encoding::X12;
217 }
218 return Encoding::C40;
219 }
220
221 wchar_t c = msg[startpos + charsProcessed];
222 charsProcessed++;
223 {
224 auto& count = charCounts[EncoderIndex(Encoding::ASCII)];
225 if (FXSYS_IsDecimalDigit(c))
226 count += 0.5;
227 else if (IsExtendedASCII(c))
228 count = ceilf(count) + 2;
229 else
230 count = ceilf(count) + 1;
231 }
232
233 {
234 auto& count = charCounts[EncoderIndex(Encoding::C40)];
235 if (IsNativeC40(c))
236 count += 2.0f / 3.0f;
237 else if (IsExtendedASCII(c))
238 count += 8.0f / 3.0f;
239 else
240 count += 4.0f / 3.0f;
241 }
242
243 {
244 auto& count = charCounts[EncoderIndex(Encoding::TEXT)];
245 if (IsNativeText(c))
246 count += 2.0f / 3.0f;
247 else if (IsExtendedASCII(c))
248 count += 8.0f / 3.0f;
249 else
250 count += 4.0f / 3.0f;
251 }
252
253 {
254 auto& count = charCounts[EncoderIndex(Encoding::X12)];
255 if (IsNativeX12(c))
256 count += 2.0f / 3.0f;
257 else if (IsExtendedASCII(c))
258 count += 13.0f / 3.0f;
259 else
260 count += 10.0f / 3.0f;
261 }
262
263 {
264 auto& count = charCounts[EncoderIndex(Encoding::EDIFACT)];
265 if (IsNativeEDIFACT(c))
266 count += 3.0f / 4.0f;
267 else if (IsExtendedASCII(c))
268 count += 17.0f / 4.0f;
269 else
270 count += 13.0f / 4.0f;
271 }
272
273 charCounts[EncoderIndex(Encoding::BASE256)]++;
274 if (charsProcessed < 4)
275 continue;
276
277 std::array<int32_t, kEncoderCount> intCharCounts;
278 std::array<uint8_t, kEncoderCount> mins;
279 FindMinimums(charCounts, &intCharCounts, &mins);
280 int32_t minCount = GetMinimumCount(mins);
281 int32_t ascii_count = intCharCounts[EncoderIndex(Encoding::ASCII)];
282 int32_t c40_count = intCharCounts[EncoderIndex(Encoding::C40)];
283 int32_t text_count = intCharCounts[EncoderIndex(Encoding::TEXT)];
284 int32_t x12_count = intCharCounts[EncoderIndex(Encoding::X12)];
285 int32_t editfact_count = intCharCounts[EncoderIndex(Encoding::EDIFACT)];
286 int32_t base256_count = intCharCounts[EncoderIndex(Encoding::BASE256)];
287 int32_t bet_min = std::min({base256_count, editfact_count, text_count});
288 if (ascii_count < bet_min && ascii_count < c40_count &&
289 ascii_count < x12_count) {
290 return Encoding::ASCII;
291 }
292 if (base256_count < ascii_count ||
293 (mins[EncoderIndex(Encoding::C40)] +
294 mins[EncoderIndex(Encoding::TEXT)] +
295 mins[EncoderIndex(Encoding::X12)] +
296 mins[EncoderIndex(Encoding::EDIFACT)]) == 0) {
297 return Encoding::BASE256;
298 }
299 if (minCount == 1) {
300 if (mins[EncoderIndex(Encoding::EDIFACT)] > 0)
301 return Encoding::EDIFACT;
302 if (mins[EncoderIndex(Encoding::TEXT)] > 0)
303 return Encoding::TEXT;
304 if (mins[EncoderIndex(Encoding::X12)] > 0)
305 return Encoding::X12;
306 }
307 if (c40_count + 1 < ascii_count && c40_count + 1 < bet_min) {
308 if (c40_count < x12_count)
309 return Encoding::C40;
310 if (c40_count == x12_count) {
311 size_t p = startpos + charsProcessed + 1;
312 while (p < msg.GetLength()) {
313 wchar_t tc = msg[p];
314 if (IsX12TermSep(tc))
315 return Encoding::X12;
316 if (!IsNativeX12(tc))
317 break;
318 p++;
319 }
320 return Encoding::C40;
321 }
322 }
323 }
324 }
325
326 // static
IsExtendedASCII(wchar_t ch)327 bool CBC_HighLevelEncoder::IsExtendedASCII(wchar_t ch) {
328 return ch >= 128 && ch <= 255;
329 }
330