1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8 * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 */
22
23 #include "xfa/fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
24
25 #include "third_party/bigint/BigIntegerLibrary.hh"
26 #include "xfa/fxbarcode/BC_UtilCodingConvert.h"
27 #include "xfa/fxbarcode/pdf417/BC_PDF417Compaction.h"
28 #include "xfa/fxbarcode/utils.h"
29
30 #define SUBMODE_ALPHA 0
31 #define SUBMODE_LOWER 1
32 #define SUBMODE_MIXED 2
33
34 int32_t CBC_PDF417HighLevelEncoder::TEXT_COMPACTION = 0;
35 int32_t CBC_PDF417HighLevelEncoder::BYTE_COMPACTION = 1;
36 int32_t CBC_PDF417HighLevelEncoder::NUMERIC_COMPACTION = 2;
37 int32_t CBC_PDF417HighLevelEncoder::SUBMODE_PUNCTUATION = 3;
38 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_TEXT = 900;
39 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE_PADDED = 901;
40 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_NUMERIC = 902;
41 int32_t CBC_PDF417HighLevelEncoder::SHIFT_TO_BYTE = 913;
42 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE = 924;
43 uint8_t CBC_PDF417HighLevelEncoder::TEXT_MIXED_RAW[] = {
44 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
45 35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0, 32, 0, 0, 0};
46 uint8_t CBC_PDF417HighLevelEncoder::TEXT_PUNCTUATION_RAW[] = {
47 59, 60, 62, 64, 91, 92, 93, 95, 96, 126, 33, 13, 9, 44, 58,
48 10, 45, 46, 36, 47, 34, 124, 42, 40, 41, 63, 123, 125, 39, 0};
49 int32_t CBC_PDF417HighLevelEncoder::MIXED[128] = {0};
50 int32_t CBC_PDF417HighLevelEncoder::PUNCTUATION[128] = {0};
51
Initialize()52 void CBC_PDF417HighLevelEncoder::Initialize() {
53 Inverse();
54 }
55
Finalize()56 void CBC_PDF417HighLevelEncoder::Finalize() {}
57
encodeHighLevel(CFX_WideString wideMsg,Compaction compaction,int32_t & e)58 CFX_WideString CBC_PDF417HighLevelEncoder::encodeHighLevel(
59 CFX_WideString wideMsg,
60 Compaction compaction,
61 int32_t& e) {
62 CFX_ByteString bytes;
63 CBC_UtilCodingConvert::UnicodeToUTF8(wideMsg, bytes);
64 CFX_WideString msg;
65 int32_t len = bytes.GetLength();
66 for (int32_t i = 0; i < len; i++) {
67 FX_WCHAR ch = (FX_WCHAR)(bytes.GetAt(i) & 0xff);
68 if (ch == '?' && bytes.GetAt(i) != '?') {
69 e = BCExceptionCharactersOutsideISO88591Encoding;
70 return CFX_WideString();
71 }
72 msg += ch;
73 }
74 CFX_ArrayTemplate<uint8_t> byteArr;
75 for (int32_t k = 0; k < bytes.GetLength(); k++) {
76 byteArr.Add(bytes.GetAt(k));
77 }
78 CFX_WideString sb;
79 len = msg.GetLength();
80 int32_t p = 0;
81 int32_t textSubMode = SUBMODE_ALPHA;
82 if (compaction == TEXT) {
83 encodeText(msg, p, len, sb, textSubMode);
84 } else if (compaction == BYTES) {
85 encodeBinary(&byteArr, p, byteArr.GetSize(), BYTE_COMPACTION, sb);
86 } else if (compaction == NUMERIC) {
87 sb += (FX_WCHAR)LATCH_TO_NUMERIC;
88 encodeNumeric(msg, p, len, sb);
89 } else {
90 int32_t encodingMode = LATCH_TO_TEXT;
91 while (p < len) {
92 int32_t n = determineConsecutiveDigitCount(msg, p);
93 if (n >= 13) {
94 sb += (FX_WCHAR)LATCH_TO_NUMERIC;
95 encodingMode = NUMERIC_COMPACTION;
96 textSubMode = SUBMODE_ALPHA;
97 encodeNumeric(msg, p, n, sb);
98 p += n;
99 } else {
100 int32_t t = determineConsecutiveTextCount(msg, p);
101 if (t >= 5 || n == len) {
102 if (encodingMode != TEXT_COMPACTION) {
103 sb += (FX_WCHAR)LATCH_TO_TEXT;
104 encodingMode = TEXT_COMPACTION;
105 textSubMode = SUBMODE_ALPHA;
106 }
107 textSubMode = encodeText(msg, p, t, sb, textSubMode);
108 p += t;
109 } else {
110 int32_t b = determineConsecutiveBinaryCount(msg, &byteArr, p, e);
111 if (e != BCExceptionNO)
112 return L" ";
113 if (b == 0) {
114 b = 1;
115 }
116 if (b == 1 && encodingMode == TEXT_COMPACTION) {
117 encodeBinary(&byteArr, p, 1, TEXT_COMPACTION, sb);
118 } else {
119 encodeBinary(&byteArr, p, b, encodingMode, sb);
120 encodingMode = BYTE_COMPACTION;
121 textSubMode = SUBMODE_ALPHA;
122 }
123 p += b;
124 }
125 }
126 }
127 }
128 return sb;
129 }
130
Inverse()131 void CBC_PDF417HighLevelEncoder::Inverse() {
132 for (size_t l = 0; l < FX_ArraySize(MIXED); ++l)
133 MIXED[l] = -1;
134
135 for (uint8_t i = 0; i < FX_ArraySize(TEXT_MIXED_RAW); ++i) {
136 uint8_t b = TEXT_MIXED_RAW[i];
137 if (b != 0)
138 MIXED[b] = i;
139 }
140
141 for (size_t l = 0; l < FX_ArraySize(PUNCTUATION); ++l)
142 PUNCTUATION[l] = -1;
143
144 for (uint8_t i = 0; i < FX_ArraySize(TEXT_PUNCTUATION_RAW); ++i) {
145 uint8_t b = TEXT_PUNCTUATION_RAW[i];
146 if (b != 0)
147 PUNCTUATION[b] = i;
148 }
149 }
150
encodeText(CFX_WideString msg,int32_t startpos,int32_t count,CFX_WideString & sb,int32_t initialSubmode)151 int32_t CBC_PDF417HighLevelEncoder::encodeText(CFX_WideString msg,
152 int32_t startpos,
153 int32_t count,
154 CFX_WideString& sb,
155 int32_t initialSubmode) {
156 CFX_WideString tmp;
157 int32_t submode = initialSubmode;
158 int32_t idx = 0;
159 while (true) {
160 FX_WCHAR ch = msg.GetAt(startpos + idx);
161 switch (submode) {
162 case SUBMODE_ALPHA:
163 if (isAlphaUpper(ch)) {
164 if (ch == ' ') {
165 tmp += (FX_WCHAR)26;
166 } else {
167 tmp += (FX_WCHAR)(ch - 65);
168 }
169 } else {
170 if (isAlphaLower(ch)) {
171 submode = SUBMODE_LOWER;
172 tmp += (FX_WCHAR)27;
173 continue;
174 } else if (isMixed(ch)) {
175 submode = SUBMODE_MIXED;
176 tmp += (FX_WCHAR)28;
177 continue;
178 } else {
179 tmp += (FX_WCHAR)29;
180 tmp += PUNCTUATION[ch];
181 break;
182 }
183 }
184 break;
185 case SUBMODE_LOWER:
186 if (isAlphaLower(ch)) {
187 if (ch == ' ') {
188 tmp += (FX_WCHAR)26;
189 } else {
190 tmp += (FX_WCHAR)(ch - 97);
191 }
192 } else {
193 if (isAlphaUpper(ch)) {
194 tmp += (FX_WCHAR)27;
195 tmp += (FX_WCHAR)(ch - 65);
196 break;
197 } else if (isMixed(ch)) {
198 submode = SUBMODE_MIXED;
199 tmp += (FX_WCHAR)28;
200 continue;
201 } else {
202 tmp += (FX_WCHAR)29;
203 tmp += PUNCTUATION[ch];
204 break;
205 }
206 }
207 break;
208 case SUBMODE_MIXED:
209 if (isMixed(ch)) {
210 tmp += MIXED[ch];
211 } else {
212 if (isAlphaUpper(ch)) {
213 submode = SUBMODE_ALPHA;
214 tmp += (FX_WCHAR)28;
215 continue;
216 } else if (isAlphaLower(ch)) {
217 submode = SUBMODE_LOWER;
218 tmp += (FX_WCHAR)27;
219 continue;
220 } else {
221 if (startpos + idx + 1 < count) {
222 FX_WCHAR next = msg.GetAt(startpos + idx + 1);
223 if (isPunctuation(next)) {
224 submode = SUBMODE_PUNCTUATION;
225 tmp += (FX_WCHAR)25;
226 continue;
227 }
228 }
229 tmp += (FX_WCHAR)29;
230 tmp += PUNCTUATION[ch];
231 }
232 }
233 break;
234 default:
235 if (isPunctuation(ch)) {
236 tmp += PUNCTUATION[ch];
237 } else {
238 submode = SUBMODE_ALPHA;
239 tmp += (FX_WCHAR)29;
240 continue;
241 }
242 }
243 idx++;
244 if (idx >= count) {
245 break;
246 }
247 }
248 FX_WCHAR h = 0;
249 int32_t len = tmp.GetLength();
250 for (int32_t i = 0; i < len; i++) {
251 bool odd = (i % 2) != 0;
252 if (odd) {
253 h = (FX_WCHAR)((h * 30) + tmp.GetAt(i));
254 sb += h;
255 } else {
256 h = tmp.GetAt(i);
257 }
258 }
259 if ((len % 2) != 0) {
260 sb += (FX_WCHAR)((h * 30) + 29);
261 }
262 return submode;
263 }
encodeBinary(CFX_ArrayTemplate<uint8_t> * bytes,int32_t startpos,int32_t count,int32_t startmode,CFX_WideString & sb)264 void CBC_PDF417HighLevelEncoder::encodeBinary(CFX_ArrayTemplate<uint8_t>* bytes,
265 int32_t startpos,
266 int32_t count,
267 int32_t startmode,
268 CFX_WideString& sb) {
269 if (count == 1 && startmode == TEXT_COMPACTION) {
270 sb += (FX_WCHAR)SHIFT_TO_BYTE;
271 }
272 int32_t idx = startpos;
273 int32_t i = 0;
274 if (count >= 6) {
275 sb += (FX_WCHAR)LATCH_TO_BYTE;
276 FX_WCHAR chars[5];
277 while ((startpos + count - idx) >= 6) {
278 int64_t t = 0;
279 for (i = 0; i < 6; i++) {
280 t <<= 8;
281 t += bytes->GetAt(idx + i) & 0xff;
282 }
283 for (i = 0; i < 5; i++) {
284 chars[i] = (FX_WCHAR)(t % 900);
285 t /= 900;
286 }
287 for (i = 4; i >= 0; i--) {
288 sb += (chars[i]);
289 }
290 idx += 6;
291 }
292 }
293 if (idx < startpos + count) {
294 sb += (FX_WCHAR)LATCH_TO_BYTE_PADDED;
295 }
296 for (i = idx; i < startpos + count; i++) {
297 int32_t ch = bytes->GetAt(i) & 0xff;
298 sb += (FX_WCHAR)ch;
299 }
300 }
encodeNumeric(CFX_WideString msg,int32_t startpos,int32_t count,CFX_WideString & sb)301 void CBC_PDF417HighLevelEncoder::encodeNumeric(CFX_WideString msg,
302 int32_t startpos,
303 int32_t count,
304 CFX_WideString& sb) {
305 int32_t idx = 0;
306 BigInteger num900 = 900;
307 while (idx < count) {
308 CFX_WideString tmp;
309 int32_t len = 44 < count - idx ? 44 : count - idx;
310 CFX_ByteString part =
311 ((FX_WCHAR)'1' + msg.Mid(startpos + idx, len)).UTF8Encode();
312 BigInteger bigint = stringToBigInteger(part.c_str());
313 do {
314 int32_t c = (bigint % num900).toInt();
315 tmp += (FX_WCHAR)(c);
316 bigint = bigint / num900;
317 } while (!bigint.isZero());
318 for (int32_t i = tmp.GetLength() - 1; i >= 0; i--) {
319 sb += tmp.GetAt(i);
320 }
321 idx += len;
322 }
323 }
isDigit(FX_WCHAR ch)324 bool CBC_PDF417HighLevelEncoder::isDigit(FX_WCHAR ch) {
325 return ch >= '0' && ch <= '9';
326 }
isAlphaUpper(FX_WCHAR ch)327 bool CBC_PDF417HighLevelEncoder::isAlphaUpper(FX_WCHAR ch) {
328 return ch == ' ' || (ch >= 'A' && ch <= 'Z');
329 }
isAlphaLower(FX_WCHAR ch)330 bool CBC_PDF417HighLevelEncoder::isAlphaLower(FX_WCHAR ch) {
331 return ch == ' ' || (ch >= 'a' && ch <= 'z');
332 }
isMixed(FX_WCHAR ch)333 bool CBC_PDF417HighLevelEncoder::isMixed(FX_WCHAR ch) {
334 return MIXED[ch] != -1;
335 }
isPunctuation(FX_WCHAR ch)336 bool CBC_PDF417HighLevelEncoder::isPunctuation(FX_WCHAR ch) {
337 return PUNCTUATION[ch] != -1;
338 }
isText(FX_WCHAR ch)339 bool CBC_PDF417HighLevelEncoder::isText(FX_WCHAR ch) {
340 return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
341 }
determineConsecutiveDigitCount(CFX_WideString msg,int32_t startpos)342 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveDigitCount(
343 CFX_WideString msg,
344 int32_t startpos) {
345 int32_t count = 0;
346 int32_t len = msg.GetLength();
347 int32_t idx = startpos;
348 if (idx < len) {
349 FX_WCHAR ch = msg.GetAt(idx);
350 while (isDigit(ch) && idx < len) {
351 count++;
352 idx++;
353 if (idx < len) {
354 ch = msg.GetAt(idx);
355 }
356 }
357 }
358 return count;
359 }
determineConsecutiveTextCount(CFX_WideString msg,int32_t startpos)360 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveTextCount(
361 CFX_WideString msg,
362 int32_t startpos) {
363 int32_t len = msg.GetLength();
364 int32_t idx = startpos;
365 while (idx < len) {
366 FX_WCHAR ch = msg.GetAt(idx);
367 int32_t numericCount = 0;
368 while (numericCount < 13 && isDigit(ch) && idx < len) {
369 numericCount++;
370 idx++;
371 if (idx < len) {
372 ch = msg.GetAt(idx);
373 }
374 }
375 if (numericCount >= 13) {
376 return idx - startpos - numericCount;
377 }
378 if (numericCount > 0) {
379 continue;
380 }
381 ch = msg.GetAt(idx);
382 if (!isText(ch)) {
383 break;
384 }
385 idx++;
386 }
387 return idx - startpos;
388 }
determineConsecutiveBinaryCount(CFX_WideString msg,CFX_ArrayTemplate<uint8_t> * bytes,int32_t startpos,int32_t & e)389 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveBinaryCount(
390 CFX_WideString msg,
391 CFX_ArrayTemplate<uint8_t>* bytes,
392 int32_t startpos,
393 int32_t& e) {
394 int32_t len = msg.GetLength();
395 int32_t idx = startpos;
396 while (idx < len) {
397 FX_WCHAR ch = msg.GetAt(idx);
398 int32_t numericCount = 0;
399 while (numericCount < 13 && isDigit(ch)) {
400 numericCount++;
401 int32_t i = idx + numericCount;
402 if (i >= len) {
403 break;
404 }
405 ch = msg.GetAt(i);
406 }
407 if (numericCount >= 13) {
408 return idx - startpos;
409 }
410 int32_t textCount = 0;
411 while (textCount < 5 && isText(ch)) {
412 textCount++;
413 int32_t i = idx + textCount;
414 if (i >= len) {
415 break;
416 }
417 ch = msg.GetAt(i);
418 }
419 if (textCount >= 5) {
420 return idx - startpos;
421 }
422 ch = msg.GetAt(idx);
423 if (bytes->GetAt(idx) == 63 && ch != '?') {
424 e = BCExceptionNonEncodableCharacterDetected;
425 return -1;
426 }
427 idx++;
428 }
429 return idx - startpos;
430 }
431