1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8 * Copyright 2006 Jeremias Maerki in part, and ZXing Authors in part
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 */
22
23 #include "fxbarcode/pdf417/BC_PDF417HighLevelEncoder.h"
24
25 #include "fxbarcode/BC_UtilCodingConvert.h"
26 #include "fxbarcode/pdf417/BC_PDF417Compaction.h"
27 #include "fxbarcode/utils.h"
28 #include "third_party/bigint/BigIntegerLibrary.hh"
29
30 #define SUBMODE_ALPHA 0
31 #define SUBMODE_LOWER 1
32 #define SUBMODE_MIXED 2
33
34 int32_t CBC_PDF417HighLevelEncoder::TEXT_COMPACTION = 0;
35 int32_t CBC_PDF417HighLevelEncoder::BYTE_COMPACTION = 1;
36 int32_t CBC_PDF417HighLevelEncoder::NUMERIC_COMPACTION = 2;
37 int32_t CBC_PDF417HighLevelEncoder::SUBMODE_PUNCTUATION = 3;
38 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_TEXT = 900;
39 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE_PADDED = 901;
40 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_NUMERIC = 902;
41 int32_t CBC_PDF417HighLevelEncoder::SHIFT_TO_BYTE = 913;
42 int32_t CBC_PDF417HighLevelEncoder::LATCH_TO_BYTE = 924;
43 uint8_t CBC_PDF417HighLevelEncoder::TEXT_MIXED_RAW[] = {
44 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 38, 13, 9, 44, 58,
45 35, 45, 46, 36, 47, 43, 37, 42, 61, 94, 0, 32, 0, 0, 0};
46 uint8_t CBC_PDF417HighLevelEncoder::TEXT_PUNCTUATION_RAW[] = {
47 59, 60, 62, 64, 91, 92, 93, 95, 96, 126, 33, 13, 9, 44, 58,
48 10, 45, 46, 36, 47, 34, 124, 42, 40, 41, 63, 123, 125, 39, 0};
49 int32_t CBC_PDF417HighLevelEncoder::MIXED[128] = {0};
50 int32_t CBC_PDF417HighLevelEncoder::PUNCTUATION[128] = {0};
51
Initialize()52 void CBC_PDF417HighLevelEncoder::Initialize() {
53 Inverse();
54 }
55
Finalize()56 void CBC_PDF417HighLevelEncoder::Finalize() {}
57
encodeHighLevel(WideString wideMsg,Compaction compaction,int32_t & e)58 WideString CBC_PDF417HighLevelEncoder::encodeHighLevel(WideString wideMsg,
59 Compaction compaction,
60 int32_t& e) {
61 ByteString bytes;
62 CBC_UtilCodingConvert::UnicodeToUTF8(wideMsg, bytes);
63 WideString msg;
64 int32_t len = bytes.GetLength();
65 for (int32_t i = 0; i < len; i++) {
66 wchar_t ch = (wchar_t)(bytes[i] & 0xff);
67 if (ch == '?' && bytes[i] != '?') {
68 e = BCExceptionCharactersOutsideISO88591Encoding;
69 return WideString();
70 }
71 msg += ch;
72 }
73 std::vector<uint8_t> byteArr(bytes.begin(), bytes.end());
74 WideString sb;
75 len = msg.GetLength();
76 int32_t p = 0;
77 int32_t textSubMode = SUBMODE_ALPHA;
78 if (compaction == TEXT) {
79 encodeText(msg, p, len, sb, textSubMode);
80 } else if (compaction == BYTES) {
81 encodeBinary(&byteArr, p, byteArr.size(), BYTE_COMPACTION, sb);
82 } else if (compaction == NUMERIC) {
83 sb += (wchar_t)LATCH_TO_NUMERIC;
84 encodeNumeric(msg, p, len, sb);
85 } else {
86 int32_t encodingMode = LATCH_TO_TEXT;
87 while (p < len) {
88 int32_t n = determineConsecutiveDigitCount(msg, p);
89 if (n >= 13) {
90 sb += (wchar_t)LATCH_TO_NUMERIC;
91 encodingMode = NUMERIC_COMPACTION;
92 textSubMode = SUBMODE_ALPHA;
93 encodeNumeric(msg, p, n, sb);
94 p += n;
95 } else {
96 int32_t t = determineConsecutiveTextCount(msg, p);
97 if (t >= 5 || n == len) {
98 if (encodingMode != TEXT_COMPACTION) {
99 sb += (wchar_t)LATCH_TO_TEXT;
100 encodingMode = TEXT_COMPACTION;
101 textSubMode = SUBMODE_ALPHA;
102 }
103 textSubMode = encodeText(msg, p, t, sb, textSubMode);
104 p += t;
105 } else {
106 int32_t b = determineConsecutiveBinaryCount(msg, &byteArr, p, e);
107 if (e != BCExceptionNO)
108 return L" ";
109 if (b == 0) {
110 b = 1;
111 }
112 if (b == 1 && encodingMode == TEXT_COMPACTION) {
113 encodeBinary(&byteArr, p, 1, TEXT_COMPACTION, sb);
114 } else {
115 encodeBinary(&byteArr, p, b, encodingMode, sb);
116 encodingMode = BYTE_COMPACTION;
117 textSubMode = SUBMODE_ALPHA;
118 }
119 p += b;
120 }
121 }
122 }
123 }
124 return sb;
125 }
126
Inverse()127 void CBC_PDF417HighLevelEncoder::Inverse() {
128 for (size_t l = 0; l < FX_ArraySize(MIXED); ++l)
129 MIXED[l] = -1;
130
131 for (uint8_t i = 0; i < FX_ArraySize(TEXT_MIXED_RAW); ++i) {
132 uint8_t b = TEXT_MIXED_RAW[i];
133 if (b != 0)
134 MIXED[b] = i;
135 }
136
137 for (size_t l = 0; l < FX_ArraySize(PUNCTUATION); ++l)
138 PUNCTUATION[l] = -1;
139
140 for (uint8_t i = 0; i < FX_ArraySize(TEXT_PUNCTUATION_RAW); ++i) {
141 uint8_t b = TEXT_PUNCTUATION_RAW[i];
142 if (b != 0)
143 PUNCTUATION[b] = i;
144 }
145 }
146
encodeText(WideString msg,int32_t startpos,int32_t count,WideString & sb,int32_t initialSubmode)147 int32_t CBC_PDF417HighLevelEncoder::encodeText(WideString msg,
148 int32_t startpos,
149 int32_t count,
150 WideString& sb,
151 int32_t initialSubmode) {
152 WideString tmp;
153 int32_t submode = initialSubmode;
154 int32_t idx = 0;
155 while (true) {
156 wchar_t ch = msg[startpos + idx];
157 switch (submode) {
158 case SUBMODE_ALPHA:
159 if (isAlphaUpper(ch)) {
160 if (ch == ' ')
161 tmp += (wchar_t)26;
162 else
163 tmp += (wchar_t)(ch - 65);
164 break;
165 }
166 if (isAlphaLower(ch)) {
167 submode = SUBMODE_LOWER;
168 tmp += (wchar_t)27;
169 continue;
170 }
171 if (isMixed(ch)) {
172 submode = SUBMODE_MIXED;
173 tmp += (wchar_t)28;
174 continue;
175 }
176 tmp += (wchar_t)29;
177 tmp += PUNCTUATION[ch];
178 break;
179 case SUBMODE_LOWER:
180 if (isAlphaLower(ch)) {
181 if (ch == ' ') {
182 tmp += (wchar_t)26;
183 } else {
184 tmp += (wchar_t)(ch - 97);
185 }
186 break;
187 }
188 if (isAlphaUpper(ch)) {
189 tmp += (wchar_t)27;
190 tmp += (wchar_t)(ch - 65);
191 break;
192 }
193 if (isMixed(ch)) {
194 submode = SUBMODE_MIXED;
195 tmp += (wchar_t)28;
196 continue;
197 }
198
199 tmp += (wchar_t)29;
200 tmp += PUNCTUATION[ch];
201 break;
202 case SUBMODE_MIXED:
203 if (isMixed(ch)) {
204 tmp += MIXED[ch];
205 break;
206 }
207 if (isAlphaUpper(ch)) {
208 submode = SUBMODE_ALPHA;
209 tmp += (wchar_t)28;
210 continue;
211 }
212 if (isAlphaLower(ch)) {
213 submode = SUBMODE_LOWER;
214 tmp += (wchar_t)27;
215 continue;
216 }
217 if (startpos + idx + 1 < count) {
218 wchar_t next = msg[startpos + idx + 1];
219 if (isPunctuation(next)) {
220 submode = SUBMODE_PUNCTUATION;
221 tmp += (wchar_t)25;
222 continue;
223 }
224 }
225 tmp += (wchar_t)29;
226 tmp += PUNCTUATION[ch];
227 break;
228 default:
229 if (isPunctuation(ch)) {
230 tmp += PUNCTUATION[ch];
231 break;
232 }
233 submode = SUBMODE_ALPHA;
234 tmp += (wchar_t)29;
235 continue;
236 }
237 idx++;
238 if (idx >= count) {
239 break;
240 }
241 }
242 wchar_t h = 0;
243 int32_t len = tmp.GetLength();
244 for (int32_t i = 0; i < len; i++) {
245 bool odd = (i % 2) != 0;
246 if (odd) {
247 h = (wchar_t)((h * 30) + tmp[i]);
248 sb += h;
249 } else {
250 h = tmp[i];
251 }
252 }
253 if ((len % 2) != 0) {
254 sb += (wchar_t)((h * 30) + 29);
255 }
256 return submode;
257 }
encodeBinary(std::vector<uint8_t> * bytes,int32_t startpos,int32_t count,int32_t startmode,WideString & sb)258 void CBC_PDF417HighLevelEncoder::encodeBinary(std::vector<uint8_t>* bytes,
259 int32_t startpos,
260 int32_t count,
261 int32_t startmode,
262 WideString& sb) {
263 if (count == 1 && startmode == TEXT_COMPACTION) {
264 sb += (wchar_t)SHIFT_TO_BYTE;
265 }
266 int32_t idx = startpos;
267 int32_t i = 0;
268 if (count >= 6) {
269 sb += (wchar_t)LATCH_TO_BYTE;
270 wchar_t chars[5];
271 while ((startpos + count - idx) >= 6) {
272 int64_t t = 0;
273 for (i = 0; i < 6; i++) {
274 t <<= 8;
275 t += (*bytes)[idx + i] & 0xff;
276 }
277 for (i = 0; i < 5; i++) {
278 chars[i] = (wchar_t)(t % 900);
279 t /= 900;
280 }
281 for (i = 4; i >= 0; i--) {
282 sb += (chars[i]);
283 }
284 idx += 6;
285 }
286 }
287 if (idx < startpos + count) {
288 sb += (wchar_t)LATCH_TO_BYTE_PADDED;
289 }
290 for (i = idx; i < startpos + count; i++) {
291 int32_t ch = (*bytes)[i] & 0xff;
292 sb += (wchar_t)ch;
293 }
294 }
encodeNumeric(WideString msg,int32_t startpos,int32_t count,WideString & sb)295 void CBC_PDF417HighLevelEncoder::encodeNumeric(WideString msg,
296 int32_t startpos,
297 int32_t count,
298 WideString& sb) {
299 int32_t idx = 0;
300 BigInteger num900 = 900;
301 while (idx < count) {
302 WideString tmp;
303 int32_t len = 44 < count - idx ? 44 : count - idx;
304 ByteString part =
305 ((wchar_t)'1' + msg.Mid(startpos + idx, len)).UTF8Encode();
306 BigInteger bigint = stringToBigInteger(part.c_str());
307 do {
308 int32_t c = (bigint % num900).toInt();
309 tmp += (wchar_t)(c);
310 bigint = bigint / num900;
311 } while (!bigint.isZero());
312 for (int32_t i = tmp.GetLength() - 1; i >= 0; i--) {
313 sb += tmp[i];
314 }
315 idx += len;
316 }
317 }
isDigit(wchar_t ch)318 bool CBC_PDF417HighLevelEncoder::isDigit(wchar_t ch) {
319 return ch >= '0' && ch <= '9';
320 }
isAlphaUpper(wchar_t ch)321 bool CBC_PDF417HighLevelEncoder::isAlphaUpper(wchar_t ch) {
322 return ch == ' ' || (ch >= 'A' && ch <= 'Z');
323 }
isAlphaLower(wchar_t ch)324 bool CBC_PDF417HighLevelEncoder::isAlphaLower(wchar_t ch) {
325 return ch == ' ' || (ch >= 'a' && ch <= 'z');
326 }
isMixed(wchar_t ch)327 bool CBC_PDF417HighLevelEncoder::isMixed(wchar_t ch) {
328 return MIXED[ch] != -1;
329 }
isPunctuation(wchar_t ch)330 bool CBC_PDF417HighLevelEncoder::isPunctuation(wchar_t ch) {
331 return PUNCTUATION[ch] != -1;
332 }
isText(wchar_t ch)333 bool CBC_PDF417HighLevelEncoder::isText(wchar_t ch) {
334 return ch == '\t' || ch == '\n' || ch == '\r' || (ch >= 32 && ch <= 126);
335 }
determineConsecutiveDigitCount(WideString msg,int32_t startpos)336 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveDigitCount(
337 WideString msg,
338 int32_t startpos) {
339 int32_t count = 0;
340 int32_t len = msg.GetLength();
341 int32_t idx = startpos;
342 if (idx < len) {
343 wchar_t ch = msg[idx];
344 while (isDigit(ch) && idx < len) {
345 count++;
346 idx++;
347 if (idx < len) {
348 ch = msg[idx];
349 }
350 }
351 }
352 return count;
353 }
determineConsecutiveTextCount(WideString msg,int32_t startpos)354 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveTextCount(
355 WideString msg,
356 int32_t startpos) {
357 int32_t len = msg.GetLength();
358 int32_t idx = startpos;
359 while (idx < len) {
360 wchar_t ch = msg[idx];
361 int32_t numericCount = 0;
362 while (numericCount < 13 && isDigit(ch) && idx < len) {
363 numericCount++;
364 idx++;
365 if (idx < len) {
366 ch = msg[idx];
367 }
368 }
369 if (numericCount >= 13) {
370 return idx - startpos - numericCount;
371 }
372 if (numericCount > 0) {
373 continue;
374 }
375 ch = msg[idx];
376 if (!isText(ch)) {
377 break;
378 }
379 idx++;
380 }
381 return idx - startpos;
382 }
determineConsecutiveBinaryCount(WideString msg,std::vector<uint8_t> * bytes,int32_t startpos,int32_t & e)383 int32_t CBC_PDF417HighLevelEncoder::determineConsecutiveBinaryCount(
384 WideString msg,
385 std::vector<uint8_t>* bytes,
386 int32_t startpos,
387 int32_t& e) {
388 int32_t len = msg.GetLength();
389 int32_t idx = startpos;
390 while (idx < len) {
391 wchar_t ch = msg[idx];
392 int32_t numericCount = 0;
393 while (numericCount < 13 && isDigit(ch)) {
394 numericCount++;
395 int32_t i = idx + numericCount;
396 if (i >= len) {
397 break;
398 }
399 ch = msg[i];
400 }
401 if (numericCount >= 13) {
402 return idx - startpos;
403 }
404 int32_t textCount = 0;
405 while (textCount < 5 && isText(ch)) {
406 textCount++;
407 int32_t i = idx + textCount;
408 if (i >= len) {
409 break;
410 }
411 ch = msg[i];
412 }
413 if (textCount >= 5) {
414 return idx - startpos;
415 }
416 ch = msg[idx];
417 if ((*bytes)[idx] == 63 && ch != '?') {
418 e = BCExceptionNonEncodableCharacterDetected;
419 return -1;
420 }
421 idx++;
422 }
423 return idx - startpos;
424 }
425