• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 // Original code is licensed as follows:
7 /*
8  * Copyright 2006-2007 Jeremias Maerki.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include "fxbarcode/datamatrix/BC_HighLevelEncoder.h"
24 
25 #include <algorithm>
26 #include <array>
27 #include <limits>
28 #include <memory>
29 #include <vector>
30 
31 #include "core/fxcrt/check.h"
32 #include "core/fxcrt/fx_extension.h"
33 #include "fxbarcode/datamatrix/BC_ASCIIEncoder.h"
34 #include "fxbarcode/datamatrix/BC_Base256Encoder.h"
35 #include "fxbarcode/datamatrix/BC_C40Encoder.h"
36 #include "fxbarcode/datamatrix/BC_EdifactEncoder.h"
37 #include "fxbarcode/datamatrix/BC_Encoder.h"
38 #include "fxbarcode/datamatrix/BC_EncoderContext.h"
39 #include "fxbarcode/datamatrix/BC_SymbolInfo.h"
40 #include "fxbarcode/datamatrix/BC_TextEncoder.h"
41 #include "fxbarcode/datamatrix/BC_X12Encoder.h"
42 
43 namespace {
44 
45 const wchar_t kPad = 129;
46 const wchar_t kMacro05 = 236;
47 const wchar_t kMacro06 = 237;
48 const wchar_t kMacro05Header[] =
49     L"[)>\036"
50     L"05";
51 const wchar_t kMacro06Header[] =
52     L"[)>\036"
53     L"06";
54 const wchar_t kMacroTrailer = 0x0004;
55 
56 constexpr size_t kEncoderCount =
57     static_cast<size_t>(CBC_HighLevelEncoder::Encoding::LAST) + 1;
58 static_assert(kEncoderCount == 6, "Bad encoder count");
59 
Randomize253State(wchar_t ch,int32_t codewordPosition)60 wchar_t Randomize253State(wchar_t ch, int32_t codewordPosition) {
61   int32_t pseudoRandom = ((149 * codewordPosition) % 253) + 1;
62   int32_t tempVariable = ch + pseudoRandom;
63   return tempVariable <= 254 ? static_cast<wchar_t>(tempVariable)
64                              : static_cast<wchar_t>(tempVariable - 254);
65 }
66 
FindMinimums(const std::array<float,kEncoderCount> & charCounts,std::array<int32_t,kEncoderCount> * intCharCounts,std::array<uint8_t,kEncoderCount> * mins)67 int32_t FindMinimums(const std::array<float, kEncoderCount>& charCounts,
68                      std::array<int32_t, kEncoderCount>* intCharCounts,
69                      std::array<uint8_t, kEncoderCount>* mins) {
70   int32_t min = std::numeric_limits<int32_t>::max();
71   for (size_t i = 0; i < kEncoderCount; ++i) {
72     int32_t current = static_cast<int32_t>(ceil(charCounts[i]));
73     (*intCharCounts)[i] = current;
74     if (min > current) {
75       min = current;
76       for (auto& m : *mins)
77         m = 0;
78     }
79     if (min == current)
80       (*mins)[i]++;
81   }
82   return min;
83 }
84 
GetMinimumCount(const std::array<uint8_t,kEncoderCount> & mins)85 int32_t GetMinimumCount(const std::array<uint8_t, kEncoderCount>& mins) {
86   int32_t count = 0;
87   for (const auto& m : mins)
88     count += m;
89   return count;
90 }
91 
IsNativeC40(wchar_t ch)92 bool IsNativeC40(wchar_t ch) {
93   return (ch == ' ') || (ch >= '0' && ch <= '9') || FXSYS_IsUpperASCII(ch);
94 }
95 
IsNativeText(wchar_t ch)96 bool IsNativeText(wchar_t ch) {
97   return (ch == ' ') || (ch >= '0' && ch <= '9') || FXSYS_IsLowerASCII(ch);
98 }
99 
IsX12TermSep(wchar_t ch)100 bool IsX12TermSep(wchar_t ch) {
101   return (ch == '\r') || (ch == '*') || (ch == '>');
102 }
103 
IsNativeX12(wchar_t ch)104 bool IsNativeX12(wchar_t ch) {
105   return IsX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') ||
106          FXSYS_IsUpperASCII(ch);
107 }
108 
IsNativeEDIFACT(wchar_t ch)109 bool IsNativeEDIFACT(wchar_t ch) {
110   return ch >= ' ' && ch <= '^';
111 }
112 
EncoderIndex(CBC_HighLevelEncoder::Encoding encoding)113 size_t EncoderIndex(CBC_HighLevelEncoder::Encoding encoding) {
114   DCHECK(encoding != CBC_HighLevelEncoder::Encoding::UNKNOWN);
115   return static_cast<size_t>(encoding);
116 }
117 
118 }  // namespace
119 
120 // static
EncodeHighLevel(const WideString & msg)121 WideString CBC_HighLevelEncoder::EncodeHighLevel(const WideString& msg) {
122   // Per spec. Alpha numeric input is even shorter.
123   static constexpr size_t kMaxNumericInputLength = 3116;
124 
125   // Exit early if the input is too long. It will fail no matter what.
126   if (msg.GetLength() > kMaxNumericInputLength)
127     return WideString();
128 
129   CBC_EncoderContext context(msg);
130   if (context.HasCharactersOutsideISO88591Encoding())
131     return WideString();
132 
133   if (msg.Back() == kMacroTrailer) {
134     WideString left = msg.First(6);
135     if (left == kMacro05Header) {
136       context.writeCodeword(kMacro05);
137       context.setSkipAtEnd(2);
138       context.m_pos += 6;
139     } else if (left == kMacro06Header) {
140       context.writeCodeword(kMacro06);
141       context.setSkipAtEnd(2);
142       context.m_pos += 6;
143     }
144   }
145 
146   std::vector<std::unique_ptr<CBC_Encoder>> encoders;
147   encoders.push_back(std::make_unique<CBC_ASCIIEncoder>());
148   encoders.push_back(std::make_unique<CBC_C40Encoder>());
149   encoders.push_back(std::make_unique<CBC_TextEncoder>());
150   encoders.push_back(std::make_unique<CBC_X12Encoder>());
151   encoders.push_back(std::make_unique<CBC_EdifactEncoder>());
152   encoders.push_back(std::make_unique<CBC_Base256Encoder>());
153   Encoding encodingMode = Encoding::ASCII;
154   while (context.hasMoreCharacters()) {
155     if (!encoders[EncoderIndex(encodingMode)]->Encode(&context))
156       return WideString();
157 
158     if (context.m_newEncoding != Encoding::UNKNOWN) {
159       encodingMode = context.m_newEncoding;
160       context.ResetEncoderSignal();
161     }
162   }
163   size_t len = context.m_codewords.GetLength();
164   if (!context.UpdateSymbolInfo())
165     return WideString();
166 
167   size_t capacity = context.m_symbolInfo->data_capacity();
168   if (len < capacity) {
169     if (encodingMode != Encoding::ASCII && encodingMode != Encoding::BASE256)
170       context.writeCodeword(0x00fe);
171   }
172   WideString codewords = context.m_codewords;
173   if (codewords.GetLength() < capacity)
174     codewords += kPad;
175 
176   while (codewords.GetLength() < capacity)
177     codewords += Randomize253State(kPad, codewords.GetLength() + 1);
178 
179   DCHECK(!codewords.IsEmpty());
180   return codewords;
181 }
182 
183 // static
LookAheadTest(const WideString & msg,size_t startpos,CBC_HighLevelEncoder::Encoding currentMode)184 CBC_HighLevelEncoder::Encoding CBC_HighLevelEncoder::LookAheadTest(
185     const WideString& msg,
186     size_t startpos,
187     CBC_HighLevelEncoder::Encoding currentMode) {
188   if (startpos >= msg.GetLength())
189     return currentMode;
190 
191   std::array<float, kEncoderCount> charCounts;
192   if (currentMode == Encoding::ASCII) {
193     charCounts = {0, 1, 1, 1, 1, 1.25f};
194   } else {
195     charCounts = {1, 2, 2, 2, 2, 2.25f};
196     charCounts[EncoderIndex(currentMode)] = 0;
197   }
198 
199   size_t charsProcessed = 0;
200   while (true) {
201     if ((startpos + charsProcessed) == msg.GetLength()) {
202       std::array<int32_t, kEncoderCount> intCharCounts;
203       std::array<uint8_t, kEncoderCount> mins;
204       int32_t min = FindMinimums(charCounts, &intCharCounts, &mins);
205       if (intCharCounts[EncoderIndex(Encoding::ASCII)] == min)
206         return Encoding::ASCII;
207       const int32_t minCount = GetMinimumCount(mins);
208       if (minCount == 1) {
209         if (mins[EncoderIndex(Encoding::BASE256)] > 0)
210           return Encoding::BASE256;
211         if (mins[EncoderIndex(Encoding::EDIFACT)] > 0)
212           return Encoding::EDIFACT;
213         if (mins[EncoderIndex(Encoding::TEXT)] > 0)
214           return Encoding::TEXT;
215         if (mins[EncoderIndex(Encoding::X12)] > 0)
216           return Encoding::X12;
217       }
218       return Encoding::C40;
219     }
220 
221     wchar_t c = msg[startpos + charsProcessed];
222     charsProcessed++;
223     {
224       auto& count = charCounts[EncoderIndex(Encoding::ASCII)];
225       if (FXSYS_IsDecimalDigit(c))
226         count += 0.5;
227       else if (IsExtendedASCII(c))
228         count = ceilf(count) + 2;
229       else
230         count = ceilf(count) + 1;
231     }
232 
233     {
234       auto& count = charCounts[EncoderIndex(Encoding::C40)];
235       if (IsNativeC40(c))
236         count += 2.0f / 3.0f;
237       else if (IsExtendedASCII(c))
238         count += 8.0f / 3.0f;
239       else
240         count += 4.0f / 3.0f;
241     }
242 
243     {
244       auto& count = charCounts[EncoderIndex(Encoding::TEXT)];
245       if (IsNativeText(c))
246         count += 2.0f / 3.0f;
247       else if (IsExtendedASCII(c))
248         count += 8.0f / 3.0f;
249       else
250         count += 4.0f / 3.0f;
251     }
252 
253     {
254       auto& count = charCounts[EncoderIndex(Encoding::X12)];
255       if (IsNativeX12(c))
256         count += 2.0f / 3.0f;
257       else if (IsExtendedASCII(c))
258         count += 13.0f / 3.0f;
259       else
260         count += 10.0f / 3.0f;
261     }
262 
263     {
264       auto& count = charCounts[EncoderIndex(Encoding::EDIFACT)];
265       if (IsNativeEDIFACT(c))
266         count += 3.0f / 4.0f;
267       else if (IsExtendedASCII(c))
268         count += 17.0f / 4.0f;
269       else
270         count += 13.0f / 4.0f;
271     }
272 
273     charCounts[EncoderIndex(Encoding::BASE256)]++;
274     if (charsProcessed < 4)
275       continue;
276 
277     std::array<int32_t, kEncoderCount> intCharCounts;
278     std::array<uint8_t, kEncoderCount> mins;
279     FindMinimums(charCounts, &intCharCounts, &mins);
280     int32_t minCount = GetMinimumCount(mins);
281     int32_t ascii_count = intCharCounts[EncoderIndex(Encoding::ASCII)];
282     int32_t c40_count = intCharCounts[EncoderIndex(Encoding::C40)];
283     int32_t text_count = intCharCounts[EncoderIndex(Encoding::TEXT)];
284     int32_t x12_count = intCharCounts[EncoderIndex(Encoding::X12)];
285     int32_t editfact_count = intCharCounts[EncoderIndex(Encoding::EDIFACT)];
286     int32_t base256_count = intCharCounts[EncoderIndex(Encoding::BASE256)];
287     int32_t bet_min = std::min({base256_count, editfact_count, text_count});
288     if (ascii_count < bet_min && ascii_count < c40_count &&
289         ascii_count < x12_count) {
290       return Encoding::ASCII;
291     }
292     if (base256_count < ascii_count ||
293         (mins[EncoderIndex(Encoding::C40)] +
294          mins[EncoderIndex(Encoding::TEXT)] +
295          mins[EncoderIndex(Encoding::X12)] +
296          mins[EncoderIndex(Encoding::EDIFACT)]) == 0) {
297       return Encoding::BASE256;
298     }
299     if (minCount == 1) {
300       if (mins[EncoderIndex(Encoding::EDIFACT)] > 0)
301         return Encoding::EDIFACT;
302       if (mins[EncoderIndex(Encoding::TEXT)] > 0)
303         return Encoding::TEXT;
304       if (mins[EncoderIndex(Encoding::X12)] > 0)
305         return Encoding::X12;
306     }
307     if (c40_count + 1 < ascii_count && c40_count + 1 < bet_min) {
308       if (c40_count < x12_count)
309         return Encoding::C40;
310       if (c40_count == x12_count) {
311         size_t p = startpos + charsProcessed + 1;
312         while (p < msg.GetLength()) {
313           wchar_t tc = msg[p];
314           if (IsX12TermSep(tc))
315             return Encoding::X12;
316           if (!IsNativeX12(tc))
317             break;
318           p++;
319         }
320         return Encoding::C40;
321       }
322     }
323   }
324 }
325 
326 // static
IsExtendedASCII(wchar_t ch)327 bool CBC_HighLevelEncoder::IsExtendedASCII(wchar_t ch) {
328   return ch >= 128 && ch <= 255;
329 }
330