• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 2006-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.charset;
11 
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.nio.Buffer;
15 import java.nio.BufferOverflowException;
16 import java.nio.ByteBuffer;
17 import java.nio.CharBuffer;
18 import java.nio.IntBuffer;
19 import java.nio.charset.CharsetDecoder;
20 import java.nio.charset.CharsetEncoder;
21 import java.nio.charset.CoderResult;
22 import java.util.Locale;
23 
24 import com.ibm.icu.charset.UConverterSharedData.UConverterType;
25 import com.ibm.icu.impl.ICUBinary;
26 import com.ibm.icu.impl.ICUData;
27 import com.ibm.icu.impl.InvalidFormatException;
28 import com.ibm.icu.lang.UCharacter;
29 import com.ibm.icu.text.UTF16;
30 import com.ibm.icu.text.UnicodeSet;
31 
32 class CharsetMBCS extends CharsetICU {
33 
34     private byte[] fromUSubstitution = null;
35     UConverterSharedData sharedData = null;
36     private static final int MAX_VERSION_LENGTH = 4;
37 
38     // these variables are used in getUnicodeSet() and may be changed in future
39     // typedef enum UConverterSetFilter {
40       static final int UCNV_SET_FILTER_NONE = 1;
41       static final int UCNV_SET_FILTER_DBCS_ONLY = 2;
42       static final int UCNV_SET_FILTER_2022_CN = 3;
43       static final int UCNV_SET_FILTER_SJIS= 4 ;
44       static final int UCNV_SET_FILTER_GR94DBCS = 5;
45       static final int UCNV_SET_FILTER_HZ = 6;
46       static final int UCNV_SET_FILTER_COUNT = 7;
47    //  } UConverterSetFilter;
48 
49     /**
50      * Fallbacks to Unicode are stored outside the normal state table and code point structures in a vector of items of
51      * this type. They are sorted by offset.
52      */
53     final static class MBCSToUFallback {
54         int offset;
55         int codePoint;
56 
MBCSToUFallback(int off, int cp)57         MBCSToUFallback(int off, int cp) {
58             offset = off;
59             codePoint = cp;
60         }
61     }
62 
63     /**
64      * This is the MBCS part of the UConverterTable union (a runtime data structure). It keeps all the per-converter
65      * data and points into the loaded mapping tables.
66      */
67     static final class UConverterMBCSTable {
68         /* toUnicode */
69         short countStates;
70         byte dbcsOnlyState;
71         boolean stateTableOwned;
72         int countToUFallbacks;
73 
74         int stateTable[/* countStates */][/* 256 */];
75         int swapLFNLStateTable[/* countStates */][/* 256 */]; /* for swaplfnl */
76         char unicodeCodeUnits[/* countUnicodeResults */];
77         MBCSToUFallback toUFallbacks[/* countToUFallbacks */];
78 
79         /* fromUnicode */
80         char fromUnicodeTable[];  // stage1, and for MBCS_OUTPUT_1 also contains stage2
81         int fromUnicodeTableInts[];  // stage1 and stage2 together as int[]
82         // Exactly one of the fromUnicode(Type) tables is not null,
83         // depending on the outputType.
84         byte fromUnicodeBytes[];
85         char fromUnicodeChars[];
86         int fromUnicodeInts[];
87         char swapLFNLFromUnicodeChars[]; /* for swaplfnl */
88         int fromUBytesLength;
89         short outputType, unicodeMask;
90 
91         /* converter name for swaplfnl */
92         String swapLFNLName;
93 
94         /* extension data */
95         UConverterSharedData baseSharedData;
96         // int extIndexes[];
97         ByteBuffer extIndexes; // create int[] view etc. as needed
98 
99         CharBuffer mbcsIndex;                     /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */
100         // char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */
101         boolean utf8Friendly;                     /* for utf8Friendly data */
102         char maxFastUChar;                        /* for utf8Friendly data */
103 
104         /* roundtrips */
105         int asciiRoundtrips;
106 
UConverterMBCSTable()107         UConverterMBCSTable() {
108             utf8Friendly = false;
109             mbcsIndex = null;
110         }
111 
hasSupplementary()112         boolean hasSupplementary() {
113             return (unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0;
114         }
115 
116         /*
117          * UConverterMBCSTable(UConverterMBCSTable t) { countStates = t.countStates; dbcsOnlyState = t.dbcsOnlyState;
118          * stateTableOwned = t.stateTableOwned; countToUFallbacks = t.countToUFallbacks; stateTable = t.stateTable;
119          * swapLFNLStateTable = t.swapLFNLStateTable; unicodeCodeUnits = t.unicodeCodeUnits; toUFallbacks =
120          * t.toUFallbacks; fromUnicodeTable = t.fromUnicodeTable; fromUnicodeBytes = t.fromUnicodeBytes;
121          * swapLFNLFromUnicodeChars = t.swapLFNLFromUnicodeChars; fromUBytesLength = t.fromUBytesLength; outputType =
122          * t.outputType; unicodeMask = t.unicodeMask; swapLFNLName = t.swapLFNLName; baseSharedData = t.baseSharedData;
123          * extIndexes = t.extIndexes; }
124          */
125     }
126 
127     /* Constants used in MBCS data header */
128     // enum {
129         static final int MBCS_OPT_LENGTH_MASK=0x3f;
130         static final int MBCS_OPT_NO_FROM_U=0x40;
131         /*
132          * If any of the following options bits are set,
133          * then the file must be rejected.
134          */
135         static final int MBCS_OPT_INCOMPATIBLE_MASK=0xffc0;
136         /*
137          * Remove bits from this mask as more options are recognized
138          * by all implementations that use this constant.
139          */
140         static final int MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80;
141     // };
142     /* Constants for fast and UTF-8-friendly conversion. */
143     // enum {
144         static final int SBCS_FAST_MAX=0x0fff;               /* maximum code point with UTF-8-friendly SBCS runtime code, see makeconv SBCS_UTF8_MAX */
145         static final int SBCS_FAST_LIMIT=SBCS_FAST_MAX+1;    /* =0x1000 */
146         static final int MBCS_FAST_MAX=0xd7ff;               /* maximum code point with UTF-8-friendly MBCS runtime code, see makeconv MBCS_UTF8_MAX */
147         static final int MBCS_FAST_LIMIT=MBCS_FAST_MAX+1;    /* =0xd800 */
148     // };
149     /**
150      * MBCS data header. See data format description above.
151      */
152     final static class MBCSHeader {
153         byte version[/* U_MAX_VERSION_LENGTH */];
154         int countStates, countToUFallbacks, offsetToUCodeUnits, offsetFromUTable, offsetFromUBytes;
155         int flags;
156         int fromUBytesLength;
157 
158         /* new and required in version 5 */
159         int options;
160 
161         /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */
162         int fullStage2Length;  /* number of 32-bit units */
163 
MBCSHeader()164         MBCSHeader() {
165             version = new byte[MAX_VERSION_LENGTH];
166         }
167     }
168 
CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases, String classPath, ClassLoader loader)169     public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases, String classPath,
170             ClassLoader loader) throws InvalidFormatException {
171         super(icuCanonicalName, javaCanonicalName, aliases);
172 
173         /* See if the icuCanonicalName contains certain option information. */
174         if (icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING) > -1) {
175             options = UConverterConstants.OPTION_SWAP_LFNL;
176             icuCanonicalName = icuCanonicalName.substring(0, icuCanonicalName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING));
177             super.icuCanonicalName = icuCanonicalName;
178         }
179 
180         // now try to load the data
181         sharedData = loadConverter(1, icuCanonicalName, classPath, loader);
182 
183         maxBytesPerChar = sharedData.staticData.maxBytesPerChar;
184         minBytesPerChar = sharedData.staticData.minBytesPerChar;
185         maxCharsPerByte = 1;
186         fromUSubstitution = sharedData.staticData.subChar;
187         subChar = sharedData.staticData.subChar;
188         subCharLen = sharedData.staticData.subCharLen;
189         subChar1 = sharedData.staticData.subChar1;
190         fromUSubstitution = new byte[sharedData.staticData.subCharLen];
191         System.arraycopy(sharedData.staticData.subChar, 0, fromUSubstitution, 0, sharedData.staticData.subCharLen);
192 
193         initializeConverter(options);
194     }
195 
CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases)196     public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases)
197             throws InvalidFormatException {
198         this(icuCanonicalName, javaCanonicalName, aliases, ICUData.ICU_BUNDLE, null);
199     }
200 
loadConverter(int nestedLoads, String myName, String classPath, ClassLoader loader)201     private UConverterSharedData loadConverter(int nestedLoads, String myName, String classPath, ClassLoader loader)
202             throws InvalidFormatException {
203         boolean noFromU = false;
204         // Read converter data from file
205         UConverterStaticData staticData = new UConverterStaticData();
206         UConverterDataReader reader = null;
207         try {
208             String itemName = myName + '.' + UConverterSharedData.DATA_TYPE;
209             String resourceName = classPath + '/' + itemName;
210             ByteBuffer b;
211 
212             if (loader != null) {
213                 @SuppressWarnings("resource")  // Closed by getByteBufferFromInputStreamAndCloseStream().
214                 InputStream i = ICUData.getRequiredStream(loader, resourceName);
215                 b = ICUBinary.getByteBufferFromInputStreamAndCloseStream(i);
216             } else if (!classPath.equals(ICUData.ICU_BUNDLE)) {
217                 @SuppressWarnings("resource")  // Closed by getByteBufferFromInputStreamAndCloseStream().
218                 InputStream i = ICUData.getRequiredStream(resourceName);
219                 b = ICUBinary.getByteBufferFromInputStreamAndCloseStream(i);
220             } else {
221                 b = ICUBinary.getRequiredData(itemName);
222             }
223             reader = new UConverterDataReader(b);
224             reader.readStaticData(staticData);
225         } catch (IOException e) {
226             throw new InvalidFormatException(e);
227         } catch (Exception e) {
228             throw new InvalidFormatException(e);
229         }
230 
231         int type = staticData.conversionType;
232 
233         if (type != UConverterSharedData.UConverterType.MBCS
234                 || staticData.structSize != UConverterStaticData.SIZE_OF_UCONVERTER_STATIC_DATA) {
235             throw new InvalidFormatException();
236         }
237 
238         UConverterSharedData data = new UConverterSharedData(staticData);
239 
240         // Load data
241         UConverterMBCSTable mbcsTable = data.mbcs;
242         MBCSHeader header = new MBCSHeader();
243         try {
244             reader.readMBCSHeader(header);
245         } catch (IOException e) {
246             throw new InvalidFormatException();
247         }
248 
249         int offset;
250         // int[] extIndexesArray = null;
251         String baseNameString = null;
252 
253         if (header.version[0] == 5 && header.version[1] >= 3 && (header.options & MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK) == 0) {
254             noFromU = ((header.options & MBCS_OPT_NO_FROM_U) != 0);
255         } else if (header.version[0] != 4) {
256             throw new InvalidFormatException();
257         }
258 
259         mbcsTable.outputType = (byte) header.flags;
260 
261         /* extension data, header version 4.2 and higher */
262         offset = header.flags >>> 8;
263         // if(offset!=0 && mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
264         if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
265             try {
266                 baseNameString = reader.readBaseTableName();
267                 if (offset != 0) {
268                     // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
269                     // terminator byte all already read;
270                     mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData());
271                 }
272             } catch (IOException e) {
273                 throw new InvalidFormatException();
274             }
275         }
276 
277         // agljport:add this would be unnecessary if extIndexes were memory mapped
278         /*
279          * if(mbcsTable.extIndexes != null) {
280          *
281          * try { //int nbytes = mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_LENGTH]*4 +
282          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_UCHARS_LENGTH]*2 +
283          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_LENGTH]*6 +
284          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_BYTES_LENGTH] +
285          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_12_LENGTH]*2 +
286          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3_LENGTH]*2 +
287          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3B_LENGTH]*4; //int nbytes =
288          * mbcsTable.extIndexes[UConverterExt.UCNV_EXT_SIZE] //byte[] extTables = dataReader.readExtTables(nbytes);
289          * //mbcsTable.extTables = ByteBuffer.wrap(extTables); } catch(IOException e) { System.err.println("Caught
290          * IOException: " + e.getMessage()); pErrorCode[0] = UErrorCode.U_INVALID_FORMAT_ERROR; return; } }
291          */
292         if (mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
293             UConverterSharedData baseSharedData = null;
294             ByteBuffer extIndexes;
295             String baseName;
296 
297             /* extension-only file, load the base table and set values appropriately */
298             extIndexes = mbcsTable.extIndexes;
299             if (extIndexes == null) {
300                 /* extension-only file without extension */
301                 throw new InvalidFormatException();
302             }
303 
304             if (nestedLoads != 1) {
305                 /* an extension table must not be loaded as a base table */
306                 throw new InvalidFormatException();
307             }
308 
309             /* load the base table */
310             baseName = baseNameString;
311             if (baseName.equals(staticData.name)) {
312                 /* forbid loading this same extension-only file */
313                 throw new InvalidFormatException();
314             }
315 
316             // agljport:fix args.size=sizeof(UConverterLoadArgs);
317             baseSharedData = loadConverter(2, baseName, classPath, loader);
318 
319             if (baseSharedData.staticData.conversionType != UConverterType.MBCS
320                     || baseSharedData.mbcs.baseSharedData != null) {
321                 // agljport:fix ucnv_unload(baseSharedData);
322                 throw new InvalidFormatException();
323             }
324 
325             /* copy the base table data */
326             // agljport:comment deep copy in C changes mbcs through local reference mbcsTable; in java we probably don't
327             // need the deep copy so can just make sure mbcs and its local reference both refer to the same new object
328             mbcsTable = data.mbcs = baseSharedData.mbcs;
329 
330             /* overwrite values with relevant ones for the extension converter */
331             mbcsTable.baseSharedData = baseSharedData;
332             mbcsTable.extIndexes = extIndexes;
333 
334             /*
335              * It would be possible to share the swapLFNL data with a base converter, but the generated name would have
336              * to be different, and the memory would have to be free'd only once. It is easier to just create the data
337              * for the extension converter separately when it is requested.
338              */
339             mbcsTable.swapLFNLStateTable = null;
340             mbcsTable.swapLFNLFromUnicodeChars = null;
341             mbcsTable.swapLFNLName = null;
342 
343             /*
344              * Set a special, runtime-only outputType if the extension converter is a DBCS version of a base converter
345              * that also maps single bytes.
346              */
347             if (staticData.conversionType == UConverterType.DBCS
348                     || (staticData.conversionType == UConverterType.MBCS && staticData.minBytesPerChar >= 2)) {
349 
350                 if (baseSharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) {
351                     /* the base converter is SI/SO-stateful */
352                     int entry;
353 
354                     /* get the dbcs state from the state table entry for SO=0x0e */
355                     entry = mbcsTable.stateTable[0][0xe];
356                     if (MBCS_ENTRY_IS_FINAL(entry) && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_CHANGE_ONLY
357                             && MBCS_ENTRY_FINAL_STATE(entry) != 0) {
358                         mbcsTable.dbcsOnlyState = (byte) MBCS_ENTRY_FINAL_STATE(entry);
359 
360                         mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY;
361                     }
362                 } else if (baseSharedData.staticData.conversionType == UConverterType.MBCS
363                         && baseSharedData.staticData.minBytesPerChar == 1
364                         && baseSharedData.staticData.maxBytesPerChar == 2 && mbcsTable.countStates <= 127) {
365 
366                     /* non-stateful base converter, need to modify the state table */
367                     int newStateTable[][/* 256 */];
368                     int state[]; // this works because java 2-D array is array of references and we can have state =
369                     // newStateTable[i];
370                     int i, count;
371 
372                     /* allocate a new state table and copy the base state table contents */
373                     count = mbcsTable.countStates;
374                     newStateTable = new int[(count + 1) * 1024][256];
375 
376                     for (i = 0; i < mbcsTable.stateTable.length; ++i)
377                         System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0,
378                                 mbcsTable.stateTable[i].length);
379 
380                     /* change all final single-byte entries to go to a new all-illegal state */
381                     state = newStateTable[0];
382                     for (i = 0; i < 256; ++i) {
383                         if (MBCS_ENTRY_IS_FINAL(state[i])) {
384                             state[i] = MBCS_ENTRY_TRANSITION(count, 0);
385                         }
386                     }
387 
388                     /* build the new all-illegal state */
389                     state = newStateTable[count];
390                     for (i = 0; i < 256; ++i) {
391                         state[i] = MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
392                     }
393                     mbcsTable.stateTable = newStateTable;
394                     mbcsTable.countStates = (byte) (count + 1);
395                     mbcsTable.stateTableOwned = true;
396 
397                     mbcsTable.outputType = MBCS_OUTPUT_DBCS_ONLY;
398                 }
399             }
400 
401             /*
402              * unlike below for files with base tables, do not get the unicodeMask from the sharedData; instead, use the
403              * base table's unicodeMask, which we copied in the memcpy above; this is necessary because the static data
404              * unicodeMask, especially the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
405              */
406         } else {
407             /* conversion file with a base table; an additional extension table is optional */
408             /* make sure that the output type is known */
409             switch (mbcsTable.outputType) {
410             case MBCS_OUTPUT_1:
411             case MBCS_OUTPUT_2:
412             case MBCS_OUTPUT_3:
413             case MBCS_OUTPUT_4:
414             case MBCS_OUTPUT_3_EUC:
415             case MBCS_OUTPUT_4_EUC:
416             case MBCS_OUTPUT_2_SISO:
417                 /* OK */
418                 break;
419             default:
420                 throw new InvalidFormatException();
421             }
422 
423             /*
424              * converter versions 6.1 and up contain a unicodeMask that is used here to select the most efficient
425              * function implementations
426              */
427             // agljport:fix info.size=sizeof(UDataInfo);
428             // agljport:fix udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
429             if (reader.dataFormatHasUnicodeMask()) {
430                 /* mask off possible future extensions to be safe */
431                 mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3);
432             } else {
433                 /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
434                 mbcsTable.unicodeMask = UConverterConstants.HAS_SUPPLEMENTARY | UConverterConstants.HAS_SURROGATES;
435             }
436             try {
437                 reader.readMBCSTable(header, mbcsTable);
438             } catch (IOException e) {
439                 throw new InvalidFormatException();
440             }
441 
442             if (offset != 0) {
443                 try {
444                     // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
445                     // terminator byte all already read;
446                     // int namelen = baseNameString != null? baseNameString.length() + 1: 0;
447                     mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData());
448                 } catch (IOException e) {
449                     throw new InvalidFormatException();
450                 }
451             }
452 
453             if (header.version[1] >= 3 && (mbcsTable.unicodeMask & UConverterConstants.HAS_SURROGATES) == 0 &&
454                     (mbcsTable.countStates == 1 ? ((char)header.version[2] >= (SBCS_FAST_MAX>>8)) : ((char)header.version[2] >= (MBCS_FAST_MAX>>8)))) {
455                 mbcsTable.utf8Friendly = true;
456 
457                 if (mbcsTable.countStates == 1) {
458                     /*
459                      * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
460                      * Build a table with indexes to each block, to be used instead of
461                      * the regular stage 1/2 table.
462                      */
463 //                    sbcsIndex = new char[SBCS_FAST_LIMIT>>6];
464 //                    for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) {
465 //                        mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
466 //                    }
467                     /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header.version[2]>(SBCS_FAST_MAX>>8) */
468                     mbcsTable.maxFastUChar = SBCS_FAST_MAX;
469                 } else {
470                     /*
471                      * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
472                      * The .cnv file is prebuilt with an additional stage table with indexes to each block.
473                      */
474                     mbcsTable.maxFastUChar = (char)((header.version[2]<<8) | 0xff);
475                 }
476             }
477             /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
478             {
479                 int asciiRoundtrips = 0xffffffff;
480                 for (int i = 0; i < 0x80; ++i) {
481                     if (mbcsTable.stateTable[0][i] != MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
482                         asciiRoundtrips &= ~(1 << (i >> 2));
483                     }
484                 }
485                 mbcsTable.asciiRoundtrips = asciiRoundtrips;
486             }
487             // TODO: Use asciiRoundtrips to speed up conversion, like in ICU4C.
488 
489             if (noFromU) {
490                 int stage1Length = (mbcsTable.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) != 0 ? 0x440 : 0x40;
491                 int stage2Length = (header.offsetFromUBytes - header.offsetFromUTable)/4 - stage1Length/2;
492                 reconstituteData(mbcsTable, stage1Length, stage2Length, header.fullStage2Length);
493             }
494             if (mbcsTable.outputType == MBCS_OUTPUT_DBCS_ONLY || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) {
495                 /*
496                  * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
497                  * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
498                  */
499                 mbcsTable.asciiRoundtrips = 0;
500             }
501         }
502         // TODO: Use mbcsIndex to speed up UTF-16 conversion, like in ICU4C.
503         mbcsTable.mbcsIndex = null;
504         return data;
505     }
506 
writeStage3Roundtrip(UConverterMBCSTable mbcsTable, long value, int codePoints[])507     private static boolean writeStage3Roundtrip(UConverterMBCSTable mbcsTable, long value, int codePoints[]) {
508         char[] table;
509         byte[] bytes;
510         int stage2;
511         int p;
512         int c;
513         int i, st3;
514 
515         table = mbcsTable.fromUnicodeTable;
516         int[] tableInts = mbcsTable.fromUnicodeTableInts;
517         bytes = mbcsTable.fromUnicodeBytes;
518         char[] chars = mbcsTable.fromUnicodeChars;
519         int[] ints = mbcsTable.fromUnicodeInts;
520 
521         /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
522         switch(mbcsTable.outputType) {
523         case MBCS_OUTPUT_3_EUC:
524             if(value<=0xffff) {
525                 /* short sequences are stored directly */
526                 /* code set 0 or 1 */
527             } else if(value<=0x8effff) {
528                 /* code set 2 */
529                 value&=0x7fff;
530             } else /* first byte is 0x8f */ {
531                 /* code set 3 */
532                 value&=0xff7f;
533             }
534             break;
535         case MBCS_OUTPUT_4_EUC:
536             if(value<=0xffffff) {
537                 /* short sequences are stored directly */
538                 /* code set 0 or 1 */
539             } else if(value<=0x8effffffL) {
540                 /* code set 2 */
541                 value&=0x7fffff;
542             } else /* first byte is 0x8f */ {
543                 /* code set 3 */
544                 value&=0xff7fff;
545             }
546             break;
547         default:
548             break;
549         }
550 
551         for(i=0; i<=0x1f; ++value, ++i) {
552             c=codePoints[i];
553             if(c<0) {
554                 continue;
555             }
556 
557             /* locate the stage 2 & 3 data */
558             stage2 = table[c>>10] + ((c>>4)&0x3f);
559             st3 = tableInts[stage2];
560             st3 = (char)(st3 * 16 + (c&0xf));
561 
562             /* write the codepage bytes into stage 3 */
563             switch(mbcsTable.outputType) {
564             case MBCS_OUTPUT_3:
565             case MBCS_OUTPUT_4_EUC:
566                 p = st3*3;
567                 bytes[p] = (byte)(value>>16);
568                 bytes[p+1] = (byte)(value>>8);
569                 bytes[p+2] = (byte)value;
570                 break;
571             case MBCS_OUTPUT_4:
572                 ints[st3] = (int)value;
573                 break;
574             default:
575                 /* 2 bytes per character */
576                 chars[st3] = (char)value;
577                 break;
578             }
579 
580             // Set the roundtrip flag.
581             int shift = 16 + (c & 0x0F);
582             tableInts[stage2] |= (1L << shift);
583         }
584         return true;
585      }
586 
reconstituteData(UConverterMBCSTable mbcsTable, int stage1Length, int stage2Length, int fullStage2Length)587     private static void reconstituteData(UConverterMBCSTable mbcsTable,
588             int stage1Length, int stage2Length, int fullStage2Length) {
589         char[] stage1 = mbcsTable.fromUnicodeTable;
590 
591         // stage2 starts with unused stage1 space.
592         // Indexes into stage 2 count from the bottom of the fromUnicodeTable.
593         int numStage1Ints = stage1Length / 2;  // 2 chars = 1 int
594         int[] stage2 = new int[numStage1Ints + fullStage2Length];
595         System.arraycopy(mbcsTable.fromUnicodeTableInts, numStage1Ints,
596                 stage2, (fullStage2Length - stage2Length) + numStage1Ints,
597                 stage2Length);
598         mbcsTable.fromUnicodeTableInts = stage2;
599 
600         /* reconstitute the initial part of stage 2 from the mbcsIndex */
601         {
602             int stageUTF8Length=(mbcsTable.maxFastUChar+1)>>6;
603             int stageUTF8Index=0;
604             int st1, st2, st3, i;
605 
606             for (st1 = 0; stageUTF8Index < stageUTF8Length; ++st1) {
607                 st2 = stage1[st1];
608                 if (st2 != stage1Length/2) {
609                     /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
610                     for (i = 0; i < 16; ++i) {
611                         st3 = mbcsTable.mbcsIndex.get(stageUTF8Index++);
612                         if (st3 != 0) {
613                             /* a stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
614                             st3>>=4;
615                             /*
616                              * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
617                              * allocated together as a single 64-block for access from the mbcsIndex
618                              */
619                             stage2[st2++] = st3++;
620                             stage2[st2++] = st3++;
621                             stage2[st2++] = st3++;
622                             stage2[st2++] = st3;
623                         } else {
624                             /* no stage 3 block, skip */
625                             st2+=4;
626                         }
627                     }
628                 } else {
629                     /* no stage 2 block, skip */
630                     stageUTF8Index+=16;
631                 }
632             }
633         }
634 
635         switch (mbcsTable.outputType) {
636         case CharsetMBCS.MBCS_OUTPUT_2:
637         case CharsetMBCS.MBCS_OUTPUT_2_SISO:
638         case CharsetMBCS.MBCS_OUTPUT_3_EUC:
639             mbcsTable.fromUnicodeChars = new char[mbcsTable.fromUBytesLength / 2];
640             break;
641         case CharsetMBCS.MBCS_OUTPUT_3:
642         case CharsetMBCS.MBCS_OUTPUT_4_EUC:
643             mbcsTable.fromUnicodeBytes = new byte[mbcsTable.fromUBytesLength];
644             break;
645         case CharsetMBCS.MBCS_OUTPUT_4:
646             mbcsTable.fromUnicodeInts = new int[mbcsTable.fromUBytesLength / 4];
647             break;
648         default:
649             // Cannot occur, caller checked already.
650             assert false;
651         }
652 
653         /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
654         MBCSEnumToUnicode(mbcsTable);
655     }
656 
657     /*
658      * Internal function enumerating the toUnicode data of an MBCS converter.
659      * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
660      * table, but could also be used for a future getUnicodeSet() option
661      * that includes reverse fallbacks (after updating this function's implementation).
662      * Currently only handles roundtrip mappings.
663      * Does not currently handle extensions.
664      */
MBCSEnumToUnicode(UConverterMBCSTable mbcsTable)665     private static void MBCSEnumToUnicode(UConverterMBCSTable mbcsTable) {
666         /*
667          * Properties for each state, to speed up the enumeration.
668          * Ignorable actions are unassigned/illegal/state-change-only:
669          * They do not lead to mappings.
670          *
671          * Bits 7..6
672          * 1 direct/initial state (stateful converters have mulitple)
673          * 0 non-initial state with transitions or with nonignorable result actions
674          * -1 final state with only ignorable actions
675          *
676          * Bits 5..3
677          * The lowest byte value with non-ignorable actions is
678          * value<<5 (rounded down).
679          *
680          * Bits 2..0:
681          * The highest byte value with non-ignorable actions is
682          * (value<<5)&0x1f (rounded up).
683          */
684         byte stateProps[] = new byte[MBCS_MAX_STATE_COUNT];
685         int state;
686 
687         /* recurse from state 0 and set all stateProps */
688         getStateProp(mbcsTable.stateTable, stateProps, 0);
689 
690         for (state = 0; state < mbcsTable.countStates; ++state) {
691             if (stateProps[state] >= 0x40) {
692                 /* start from each direct state */
693                 enumToU(mbcsTable, stateProps, state, 0, 0);
694             }
695         }
696 
697 
698     }
699 
enumToU(UConverterMBCSTable mbcsTable, byte stateProps[], int state, int offset, int value)700     private static boolean enumToU(UConverterMBCSTable mbcsTable, byte stateProps[], int state, int offset, int value) {
701         int[] codePoints = new int[32];
702         int[] row;
703         char[] unicodeCodeUnits;
704         int anyCodePoints;
705         int b, limit;
706 
707         row = mbcsTable.stateTable[state];
708         unicodeCodeUnits = mbcsTable.unicodeCodeUnits;
709 
710         value<<=8;
711         anyCodePoints = -1; /* becomes non-negative if there is a mapping */
712 
713         b = (stateProps[state]&0x38)<<2;
714         if (b == 0 && stateProps[state] >= 0x40) {
715             /* skip byte sequences with leading zeros because they are note stored in the fromUnicode table */
716             codePoints[0] = UConverterConstants.U_SENTINEL;
717             b = 1;
718         }
719         limit = ((stateProps[state]&7)+1)<<5;
720         while (b < limit) {
721             int entry = row[b];
722             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
723                 int nextState = MBCS_ENTRY_TRANSITION_STATE(entry);
724                 if (stateProps[nextState] >= 0) {
725                     /* recurse to a state with non-ignorable actions */
726                     if (!enumToU(mbcsTable, stateProps, nextState, offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), value|b)) {
727                         return false;
728                     }
729                 }
730                 codePoints[b&0x1f] = UConverterConstants.U_SENTINEL;
731             } else {
732                 int c;
733                 int action;
734 
735                 /*
736                  * An if-else-if chain provides more reliable performance for
737                  * the most common cases compared to a switch.
738                  */
739                 action = MBCS_ENTRY_FINAL_ACTION(entry);
740                 if (action == MBCS_STATE_VALID_DIRECT_16) {
741                     /* output BMP code point */
742                     c = MBCS_ENTRY_FINAL_VALUE_16(entry);
743                 } else if (action == MBCS_STATE_VALID_16) {
744                     int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
745                     c = unicodeCodeUnits[finalOffset];
746                     if (c < 0xfffe) {
747                         /* output BMP code point */
748                     } else {
749                         c = UConverterConstants.U_SENTINEL;
750                     }
751                 } else if (action == MBCS_STATE_VALID_16_PAIR) {
752                     int finalOffset = offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
753                     c = unicodeCodeUnits[finalOffset++];
754                     if (c < 0xd800) {
755                         /* output BMP code point below 0xd800 */
756                     } else if (c <= 0xdbff) {
757                         /* output roundtrip or fallback supplementary code point */
758                         c = ((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
759                     } else if (c == 0xe000) {
760                         /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
761                         c = unicodeCodeUnits[finalOffset];
762                     } else {
763                         c = UConverterConstants.U_SENTINEL;
764                     }
765                 } else if (action == MBCS_STATE_VALID_DIRECT_20) {
766                     /* output supplementary code point */
767                     c = MBCS_ENTRY_FINAL_VALUE(entry)+0x10000;
768                 } else {
769                     c = UConverterConstants.U_SENTINEL;
770                 }
771 
772                 codePoints[b&0x1f] = c;
773                 anyCodePoints&=c;
774             }
775             if (((++b)&0x1f) == 0) {
776                 if(anyCodePoints>=0) {
777                     if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20), codePoints)) {
778                         return false;
779                     }
780                     anyCodePoints=-1;
781                 }
782             }
783         }
784 
785         return true;
786     }
787 
788     /*
789      * Only called if stateProps[state]==-1.
790      * A recursive call may do stateProps[state]|=0x40 if this state is the target of an
791      * MBCS_STATE_CHANGE_ONLY.
792      */
getStateProp(int stateTable[][], byte stateProps[], int state)793     private static byte getStateProp(int stateTable[][], byte stateProps[], int state) {
794         int[] row;
795         int min, max, entry, nextState;
796 
797         row = stateTable[state];
798         stateProps[state] = 0;
799 
800         /* find first non-ignorable state */
801         for (min = 0;;++min) {
802             entry = row[min];
803             nextState = MBCS_ENTRY_STATE(entry);
804             if (stateProps[nextState] == -1) {
805                 getStateProp(stateTable, stateProps, nextState);
806             }
807             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
808                 if (stateProps[nextState] >- 0) {
809                     break;
810                 }
811             } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) {
812                 break;
813             }
814             if (min == 0xff) {
815                 stateProps[state] = -0x40;  /* (byte)0xc0 */
816                 return stateProps[state];
817             }
818         }
819         stateProps[state]|=(byte)((min>>5)<<3);
820 
821         /* find last non-ignorable state */
822         for (max = 0xff; min < max; --max) {
823             entry = row[max];
824             nextState = MBCS_ENTRY_STATE(entry);
825             if (stateProps[nextState] == -1) {
826                 getStateProp(stateTable, stateProps, nextState);
827             }
828             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
829                 if (stateProps[nextState] >- 0) {
830                     break;
831                 }
832             } else if (MBCS_ENTRY_FINAL_ACTION(entry) < MBCS_STATE_UNASSIGNED) {
833                 break;
834             }
835         }
836         stateProps[state]|=(byte)(max>>5);
837 
838         /* recurse further and collect direct-state information */
839         while (min <= max) {
840             entry = row[min];
841             nextState = MBCS_ENTRY_STATE(entry);
842             if (stateProps[nextState] == -1) {
843                 getStateProp(stateTable, stateProps, nextState);
844             }
845             if (MBCS_ENTRY_IS_TRANSITION(entry)) {
846                 stateProps[nextState]|=0x40;
847                 if (MBCS_ENTRY_FINAL_ACTION(entry) <= MBCS_STATE_FALLBACK_DIRECT_20) {
848                     stateProps[state]|=0x40;
849                 }
850             }
851             ++min;
852         }
853         return stateProps[state];
854     }
855 
initializeConverter(int myOptions)856     protected void initializeConverter(int myOptions) {
857         UConverterMBCSTable mbcsTable;
858         ByteBuffer extIndexes;
859         short outputType;
860         byte maxBytesPerUChar;
861 
862         mbcsTable = sharedData.mbcs;
863         outputType = mbcsTable.outputType;
864 
865         if (outputType == MBCS_OUTPUT_DBCS_ONLY) {
866             /* the swaplfnl option does not apply, remove it */
867             this.options = myOptions &= ~UConverterConstants.OPTION_SWAP_LFNL;
868         }
869 
870         if ((myOptions & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
871             /* do this because double-checked locking is broken */
872             boolean isCached;
873 
874             // agljport:todo umtx_lock(NULL);
875             isCached = mbcsTable.swapLFNLStateTable != null;
876             // agljport:todo umtx_unlock(NULL);
877 
878             if (!isCached) {
879                 try {
880                     if (!EBCDICSwapLFNL()) {
881                         /* this option does not apply, remove it */
882                         this.options = myOptions & ~UConverterConstants.OPTION_SWAP_LFNL;
883                     }
884                 } catch (Exception e) {
885                     /* something went wrong. */
886                     return;
887                 }
888             }
889         }
890 
891         String lowerCaseName = icuCanonicalName.toLowerCase(Locale.ENGLISH);
892         if (lowerCaseName.indexOf("gb18030") >= 0) {
893             /* set a flag for GB 18030 mode, which changes the callback behavior */
894             this.options |= MBCS_OPTION_GB18030;
895         } else if (lowerCaseName.indexOf("keis") >= 0) {
896             this.options |= MBCS_OPTION_KEIS;
897         } else if (lowerCaseName.indexOf("jef") >= 0) {
898             this.options |= MBCS_OPTION_JEF;
899         } else if (lowerCaseName.indexOf("jips") >= 0) {
900             this.options |= MBCS_OPTION_JIPS;
901         }
902 
903         /* fix maxBytesPerUChar depending on outputType and options etc. */
904         if (outputType == MBCS_OUTPUT_2_SISO) {
905             /* changed from 3 to 4 in ICU4J only. #9205 */
906             maxBytesPerChar = 4; /* SO+DBCS+SI*/
907         }
908 
909         extIndexes = mbcsTable.extIndexes;
910         if (extIndexes != null) {
911             maxBytesPerUChar = (byte) GET_MAX_BYTES_PER_UCHAR(extIndexes);
912             if (outputType == MBCS_OUTPUT_2_SISO) {
913                 ++maxBytesPerUChar; /* SO + multiple DBCS */
914             }
915 
916             if (maxBytesPerUChar > maxBytesPerChar) {
917                 maxBytesPerChar = maxBytesPerUChar;
918             }
919         }
920     }
921      /* EBCDIC swap LF<->NL--------------------------------------------------------------------------------*/
922      /*
923       * This code modifies a standard EBCDIC<->Unicode mappling table for
924       * OS/390 (z/OS) Unix System Services (Open Edition).
925       * The difference is in the mapping of Line Feed and New Line control codes:
926       * Standard EBDIC maps
927       *
928       * <U000A> \x25 |0
929       * <U0085> \x15 |0
930       *
931       * but OS/390 USS EBCDIC swaps the control codes for LF and NL,
932       * mapping
933       *
934       * <U000A> \x15 |0
935       * <U0085> \x25 |0
936       *
937       * This code modifies a loaded standard EBCDIC<->Unicode mapping table
938       * by copying it into allocated memory and swapping the LF and NL values.
939       * It allows to support the same EBCDIC charset in both version without
940       * duplicating the entire installed table.
941       */
942     /* standard EBCDIC codes */
943     private static final short EBCDIC_LF = 0x0025;
944     private static final short EBCDIC_NL = 0x0015;
945 
946     /* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
947     private static final short EBCDIC_RT_LF = 0x0f25;
948     private static final short EBCDIC_RT_NL = 0x0f15;
949 
950     /* Unicode code points */
951     private static final short U_LF = 0x000A;
952     private static final short U_NL = 0x0085;
953 
EBCDICSwapLFNL()954     private boolean EBCDICSwapLFNL() throws Exception {
955         UConverterMBCSTable mbcsTable;
956 
957         char[] table;
958 
959         int[][] newStateTable;
960         String newName;
961 
962         int stage2Entry;
963 
964         mbcsTable = sharedData.mbcs;
965 
966         table = mbcsTable.fromUnicodeTable;
967         int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
968         char[] chars = mbcsTable.fromUnicodeChars;
969         char[] results = chars;
970 
971         /*
972          * Check that this is an EBCDIC table with SBCS portion -
973          * SBCS or EBCDIC with standard EBCDIC LF and NL mappings.
974          *
975          * If not, ignore the option. Options are always ignored if they do not apply.
976          */
977         if (!((mbcsTable.outputType == MBCS_OUTPUT_1 || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) &&
978               mbcsTable.stateTable[0][EBCDIC_LF] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
979               mbcsTable.stateTable[0][EBCDIC_NL] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL))) {
980             return false;
981         }
982 
983         if (mbcsTable.outputType == MBCS_OUTPUT_1) {
984             if (!(EBCDIC_RT_LF == MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&
985                   EBCDIC_RT_NL == MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL))) {
986                 return false;
987             }
988         } else /* MBCS_OUTPUT_2_SISO */ {
989             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF);
990             if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF) &&
991                   EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_LF))) {
992                 return false;
993             }
994 
995             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL);
996             if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL) &&
997                   EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_NL))) {
998                 return false;
999             }
1000         }
1001 
1002         if (mbcsTable.fromUBytesLength > 0) {
1003             /*
1004              * We _know_ the number of bytes in the fromUnicodeBytes array
1005              * starting with header.version 4.1.
1006              */
1007             // sizeofFromUBytes = mbcsTable.fromUBytesLength;
1008         } else {
1009             /*
1010              * Otherwise:
1011              * There used to be code to enumerate the fromUnicode
1012              * trie and find the highest entry, but it was removed in ICU 3.2
1013              * because it was not tested and caused a low code coverage number.
1014              */
1015             throw new Exception("U_INVALID_FORMAT_ERROR");
1016         }
1017 
1018         /*
1019          * The table has an appropriate format.
1020          * Allocate and build
1021          * - a modified to-Unicode state table
1022          * - a modified from-Unicode output array
1023          * - a converter name string with the swap option appended
1024          */
1025 //        size = mbcsTable.countStates * 1024 + sizeofFromUBytes + UConverterConstants.MAX_CONVERTER_NAME_LENGTH + 20;
1026 
1027         /* copy and modify the to-Unicode state table */
1028         newStateTable = new int[mbcsTable.stateTable.length][mbcsTable.stateTable[0].length];
1029         for (int i = 0; i < newStateTable.length; i++) {
1030             System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0, newStateTable[i].length);
1031         }
1032 
1033         newStateTable[0][EBCDIC_LF] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);
1034         newStateTable[0][EBCDIC_NL] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
1035 
1036         /* copy and modify the from-Unicode result table */
1037         char[] newResults = new char[chars.length];
1038         System.arraycopy(chars, 0, newResults, 0, chars.length);
1039         /* conveniently, the table access macros work on the left side of expressions */
1040         if (mbcsTable.outputType == MBCS_OUTPUT_1) {
1041             MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_LF, EBCDIC_RT_NL);
1042             MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_NL, EBCDIC_RT_LF);
1043         } else /* MBCS_OUTPUT_2_SISO */ {
1044             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF);
1045             MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_LF, EBCDIC_NL);
1046 
1047             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL);
1048             MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_NL, EBCDIC_LF);
1049         }
1050 
1051         /* set the canonical converter name */
1052         newName = icuCanonicalName.concat(UConverterConstants.OPTION_SWAP_LFNL_STRING);
1053 
1054         if (mbcsTable.swapLFNLStateTable == null) {
1055             mbcsTable.swapLFNLStateTable = newStateTable;
1056             mbcsTable.swapLFNLFromUnicodeChars = newResults;
1057             mbcsTable.swapLFNLName = newName;
1058         }
1059         return true;
1060     }
1061 
1062     /**
1063      * MBCS output types for conversions from Unicode. These per-converter types determine the storage method in stage 3
1064      * of the lookup table, mostly how many bytes are stored per entry.
1065      */
1066     static final int MBCS_OUTPUT_1 = 0; /* 0 */
1067     static final int MBCS_OUTPUT_2 = MBCS_OUTPUT_1 + 1; /* 1 */
1068     static final int MBCS_OUTPUT_3 = MBCS_OUTPUT_2 + 1; /* 2 */
1069     static final int MBCS_OUTPUT_4 = MBCS_OUTPUT_3 + 1; /* 3 */
1070     static final int MBCS_OUTPUT_3_EUC = 8; /* 8 */
1071     static final int MBCS_OUTPUT_4_EUC = MBCS_OUTPUT_3_EUC + 1; /* 9 */
1072     static final int MBCS_OUTPUT_2_SISO = 12; /* c */
1073     static final int MBCS_OUTPUT_2_HZ = MBCS_OUTPUT_2_SISO + 1; /* d */
1074     static final int MBCS_OUTPUT_EXT_ONLY = MBCS_OUTPUT_2_HZ + 1; /* e */
1075     // static final int MBCS_OUTPUT_COUNT = MBCS_OUTPUT_EXT_ONLY + 1;
1076     static final int MBCS_OUTPUT_DBCS_ONLY = 0xdb; /* runtime-only type for DBCS-only handling of SISO tables */
1077 
1078     /* GB 18030 data ------------------------------------------------------------ */
1079 
1080     /* helper macros for linear values for GB 18030 four-byte sequences */
LINEAR_18030(int a, int b, int c, int d)1081     private static int LINEAR_18030(int a, int b, int c, int d) {
1082         return ((((a & 0xff) * 10 + (b & 0xff)) * 126 + (c & 0xff)) * 10 + (d & 0xff));
1083     }
1084 
1085     private static int LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30);
1086 
LINEAR(int x)1087     private static int LINEAR(int x) {
1088         return LINEAR_18030(x >>> 24, (x >>> 16) & 0xff, (x >>> 8) & 0xff, x & 0xff);
1089     }
1090 
1091     /*
1092      * Some ranges of GB 18030 where both the Unicode code points and the GB four-byte sequences are contiguous and are
1093      * handled algorithmically by the special callback functions below. The values are start & end of Unicode & GB
1094      * codes.
1095      *
1096      * Note that single surrogates are not mapped by GB 18030 as of the re-released mapping tables from 2000-nov-30.
1097      */
1098     private static final int gb18030Ranges[][] = new int[/* 14 */][/* 4 */] {
1099             { 0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35) },
1100             { 0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738) },
1101             { 0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436) },
1102             { 0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531) },
1103             { 0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534) },
1104             { 0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38) },
1105             { 0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537) },
1106             { 0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32) },
1107             { 0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237) },
1108             { 0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733) },
1109             { 0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837) },
1110             { 0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638) },
1111             { 0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931) },
1112             { 0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439) } };
1113 
1114     /* bit flag for UConverter.options indicating GB 18030 special handling */
1115     private static final int MBCS_OPTION_GB18030 = 0x8000;
1116 
1117     /* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
1118     private static final int MBCS_OPTION_KEIS = 0x01000;
1119     private static final int MBCS_OPTION_JEF = 0x02000;
1120     private static final int MBCS_OPTION_JIPS = 0x04000;
1121 
1122     private static enum SISO_Option {
1123         SI,
1124         SO
1125     }
1126 
1127     private static final byte[] KEIS_SO_CHAR = { 0x0A, 0x42 };
1128     private static final byte[] KEIS_SI_CHAR = { 0x0A, 0x41 };
1129     private static final byte JEF_SO_CHAR = 0x28;
1130     private static final byte JEF_SI_CHAR = 0x29;
1131     private static final byte[] JIPS_SO_CHAR = { 0x1A, 0x70 };
1132     private static final byte[] JIPS_SI_CHAR = { 0x1A, 0x71 };
1133 
getSISOBytes(SISO_Option option, int cnvOption, byte[] value)1134     private static int getSISOBytes(SISO_Option option, int cnvOption, byte[] value) {
1135         int SISOLength = 0;
1136 
1137         switch (option) {
1138             case SI:
1139                 if ((cnvOption&MBCS_OPTION_KEIS)!=0) {
1140                     value[0] = KEIS_SI_CHAR[0];
1141                     value[1] = KEIS_SI_CHAR[1];
1142                     SISOLength = 2;
1143                 } else if ((cnvOption&MBCS_OPTION_JEF)!=0) {
1144                     value[0] = JEF_SI_CHAR;
1145                     SISOLength = 1;
1146                 } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) {
1147                     value[0] = JIPS_SI_CHAR[0];
1148                     value[1] = JIPS_SI_CHAR[1];
1149                     SISOLength = 2;
1150                 } else {
1151                     value[0] = UConverterConstants.SI;
1152                     SISOLength = 1;
1153                 }
1154                 break;
1155             case SO:
1156                 if ((cnvOption&MBCS_OPTION_KEIS)!=0) {
1157                     value[0] = KEIS_SO_CHAR[0];
1158                     value[1] = KEIS_SO_CHAR[1];
1159                     SISOLength = 2;
1160                 } else if ((cnvOption&MBCS_OPTION_JEF)!=0) {
1161                     value[0] = JEF_SO_CHAR;
1162                     SISOLength = 1;
1163                 } else if ((cnvOption&MBCS_OPTION_JIPS)!=0) {
1164                     value[0] = JIPS_SO_CHAR[0];
1165                     value[1] = JIPS_SO_CHAR[1];
1166                     SISOLength = 2;
1167                 } else {
1168                     value[0] = UConverterConstants.SO;
1169                     SISOLength = 1;
1170                 }
1171                 break;
1172             default:
1173                 /* Should never happen. */
1174                 break;
1175         }
1176 
1177         return SISOLength;
1178     }
1179     // enum {
1180         static final int MBCS_MAX_STATE_COUNT = 128;
1181     // };
1182     /**
1183      * MBCS action codes for conversions to Unicode. These values are in bits 23..20 of the state table entries.
1184      */
1185     static final int MBCS_STATE_VALID_DIRECT_16 = 0;
1186     static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1;
1187     static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1;
1188     static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1;
1189     static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1;
1190     static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1;
1191     static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1;
1192     static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1;
1193     static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1;
1194 
MBCS_ENTRY_SET_STATE(int entry, int state)1195     static int MBCS_ENTRY_SET_STATE(int entry, int state) {
1196         return (entry&0x80ffffff)|(state<<24L);
1197     }
1198 
MBCS_ENTRY_STATE(int entry)1199     static int MBCS_ENTRY_STATE(int entry) {
1200         return (((entry)>>24)&0x7f);
1201     }
1202 
1203     /* Methods for state table entries */
MBCS_ENTRY_TRANSITION(int state, int offset)1204     static int MBCS_ENTRY_TRANSITION(int state, int offset) {
1205         return (state << 24L) | offset;
1206     }
1207 
MBCS_ENTRY_FINAL(int state, int action, int value)1208     static int MBCS_ENTRY_FINAL(int state, int action, int value) {
1209         return 0x80000000 | (state << 24L) | (action << 20L) | value;
1210     }
1211 
MBCS_ENTRY_IS_TRANSITION(int entry)1212     static boolean MBCS_ENTRY_IS_TRANSITION(int entry) {
1213         return (entry) >= 0;
1214     }
1215 
MBCS_ENTRY_IS_FINAL(int entry)1216     static boolean MBCS_ENTRY_IS_FINAL(int entry) {
1217         return (entry) < 0;
1218     }
1219 
MBCS_ENTRY_TRANSITION_STATE(int entry)1220     static int MBCS_ENTRY_TRANSITION_STATE(int entry) {
1221         return ((entry) >>> 24);
1222     }
1223 
MBCS_ENTRY_TRANSITION_OFFSET(int entry)1224     static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) {
1225         return ((entry) & 0xffffff);
1226     }
1227 
MBCS_ENTRY_FINAL_STATE(int entry)1228     static int MBCS_ENTRY_FINAL_STATE(int entry) {
1229         return ((entry) >>> 24) & 0x7f;
1230     }
1231 
MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry)1232     static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) {
1233         return ((entry) < 0x80100000);
1234     }
1235 
MBCS_ENTRY_FINAL_ACTION(int entry)1236     static int MBCS_ENTRY_FINAL_ACTION(int entry) {
1237         return ((entry) >>> 20) & 0xf;
1238     }
1239 
MBCS_ENTRY_FINAL_VALUE(int entry)1240     static int MBCS_ENTRY_FINAL_VALUE(int entry) {
1241         return ((entry) & 0xfffff);
1242     }
1243 
MBCS_ENTRY_FINAL_VALUE_16(int entry)1244     static char MBCS_ENTRY_FINAL_VALUE_16(int entry) {
1245         return (char) (entry);
1246     }
1247 
MBCS_IS_ASCII_ROUNDTRIP(int b, long asciiRoundtrips)1248     static boolean MBCS_IS_ASCII_ROUNDTRIP(int b, long asciiRoundtrips) {
1249         return (((asciiRoundtrips) & (1<<((b)>>2)))!=0);
1250     }
1251 
1252     /**
1253      * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte. It works for single-byte,
1254      * single-state codepages that only map to and from BMP code points, and it always returns fallback values.
1255      */
MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b)1256     static char MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b) {
1257         assert 0 <= b && b <= 0xff;
1258         return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b]);
1259     }
1260 
1261     /* single-byte fromUnicode: get the 16-bit result word */
MBCS_SINGLE_RESULT_FROM_U(char[] table, char[] results, int c)1262     static char MBCS_SINGLE_RESULT_FROM_U(char[] table, char[] results, int c) {
1263         int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
1264         int i = table[i1] + (c & 0xf);
1265         return results[i];
1266     }
1267 
1268     /* single-byte fromUnicode: set the 16-bit result word with newValue*/
MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, char[] results, int c, int newValue)1269     static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, char[] results, int c, int newValue) {
1270         int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
1271         int i = table[i1] + (c & 0xf);
1272         results[i] = (char) newValue;
1273     }
1274 
1275     /* multi-byte fromUnicode: get the 32-bit stage 2 entry */
MBCS_STAGE_2_FROM_U(char[] table, int[] tableInts, int c)1276     static int MBCS_STAGE_2_FROM_U(char[] table, int[] tableInts, int c) {
1277         int i = table[(c) >>> 10] + ((c >>> 4) & 0x3f);
1278         return tableInts[i];
1279     }
1280 
MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c)1281     private static boolean MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c) {
1282         return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0);
1283     }
1284 
MBCS_VALUE_2_FROM_STAGE_2(char[] chars, int stage2Entry, int c)1285     static char MBCS_VALUE_2_FROM_STAGE_2(char[] chars, int stage2Entry, int c) {
1286         int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
1287         return chars[i];
1288     }
1289 
MBCS_VALUE_2_FROM_STAGE_2_SET(char[] chars, int stage2Entry, int c, int newValue)1290     static void MBCS_VALUE_2_FROM_STAGE_2_SET(char[] chars, int stage2Entry, int c, int newValue) {
1291         int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
1292         chars[i] = (char) newValue;
1293     }
1294 
MBCS_VALUE_4_FROM_STAGE_2(int[] ints, int stage2Entry, int c)1295     private static int MBCS_VALUE_4_FROM_STAGE_2(int[] ints, int stage2Entry, int c) {
1296         int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
1297         return ints[i];
1298     }
1299 
MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c)1300     static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
1301         return ((16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);
1302     }
1303 
1304     // ------------UConverterExt-------------------------------------------------------
1305 
1306     static final int EXT_INDEXES_LENGTH = 0; /* 0 */
1307 
1308     static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */
1309     static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1;
1310     static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1;
1311     static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1;
1312 
1313     static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */
1314     static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1;
1315     static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1;
1316     static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1;
1317     static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1;
1318 
1319     static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */
1320     static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1;
1321     static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1;
1322     static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1;
1323     static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1;
1324     static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1;
1325     static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1;
1326 
1327     private static final int EXT_COUNT_BYTES = EXT_FROM_U_STAGE_3B_LENGTH + 1; /* 17 */
1328     // private static final int EXT_COUNT_UCHARS = EXT_COUNT_BYTES + 1;
1329     // private static final int EXT_FLAGS = EXT_COUNT_UCHARS + 1;
1330     //
1331     // private static final int EXT_RESERVED_INDEX = EXT_FLAGS + 1; /* 20, moves with additional indexes */
1332     //
1333     // private static final int EXT_SIZE=31;
1334     // private static final int EXT_INDEXES_MIN_LENGTH=32;
1335 
1336     static final int EXT_FROM_U_MAX_DIRECT_LENGTH = 3;
1337 
1338     /* toUnicode helpers -------------------------------------------------------- */
1339 
1340     private static final int TO_U_BYTE_SHIFT = 24;
1341     private static final int TO_U_VALUE_MASK = 0xffffff;
1342     private static final int TO_U_MIN_CODE_POINT = 0x1f0000;
1343     private static final int TO_U_MAX_CODE_POINT = 0x2fffff;
1344     private static final int TO_U_ROUNDTRIP_FLAG = (1 << 23);
1345     private static final int TO_U_INDEX_MASK = 0x3ffff;
1346     private static final int TO_U_LENGTH_SHIFT = 18;
1347     private static final int TO_U_LENGTH_OFFSET = 12;
1348 
1349     /* maximum number of indexed UChars */
1350     static final int MAX_UCHARS = 19;
1351 
TO_U_GET_BYTE(int word)1352     static int TO_U_GET_BYTE(int word) {
1353         return word >>> TO_U_BYTE_SHIFT;
1354     }
1355 
TO_U_GET_VALUE(int word)1356     static int TO_U_GET_VALUE(int word) {
1357         return word & TO_U_VALUE_MASK;
1358     }
1359 
TO_U_IS_ROUNDTRIP(int value)1360     static boolean TO_U_IS_ROUNDTRIP(int value) {
1361         return (value & TO_U_ROUNDTRIP_FLAG) != 0;
1362     }
1363 
TO_U_IS_PARTIAL(int value)1364     static boolean TO_U_IS_PARTIAL(int value) {
1365         return 0 <= value && value < TO_U_MIN_CODE_POINT;
1366     }
1367 
TO_U_GET_PARTIAL_INDEX(int value)1368     static int TO_U_GET_PARTIAL_INDEX(int value) {
1369         return value;
1370     }
1371 
TO_U_MASK_ROUNDTRIP(int value)1372     static int TO_U_MASK_ROUNDTRIP(int value) {
1373         return value & ~TO_U_ROUNDTRIP_FLAG;
1374     }
1375 
TO_U_MAKE_WORD(byte b, int value)1376     private static int TO_U_MAKE_WORD(byte b, int value) {
1377         // TO_U_BYTE_SHIFT == 24: safe to just shift the signed byte-as-int.
1378         return (b << TO_U_BYTE_SHIFT) | value;
1379     }
1380 
1381     /* use after masking off the roundtrip flag */
TO_U_IS_CODE_POINT(int value)1382     static boolean TO_U_IS_CODE_POINT(int value) {
1383         assert value >= 0;
1384         return value <= TO_U_MAX_CODE_POINT;
1385     }
1386 
TO_U_GET_CODE_POINT(int value)1387     static int TO_U_GET_CODE_POINT(int value) {
1388         assert value >= 0;
1389         return value - TO_U_MIN_CODE_POINT;
1390     }
1391 
TO_U_GET_INDEX(int value)1392     private static int TO_U_GET_INDEX(int value) {
1393         return value & TO_U_INDEX_MASK;
1394     }
1395 
TO_U_GET_LENGTH(int value)1396     private static int TO_U_GET_LENGTH(int value) {
1397         return (value >>> TO_U_LENGTH_SHIFT) - TO_U_LENGTH_OFFSET;
1398     }
1399 
1400     /* fromUnicode helpers ------------------------------------------------------ */
1401 
1402     /* most trie constants are shared with ucnvmbcs.h */
1403     private static final int STAGE_2_LEFT_SHIFT = 2;
1404 
1405     // private static final int STAGE_3_GRANULARITY = 4;
1406 
1407     /* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */
FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c)1408     static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c) {
1409         return stage3.get((stage12.get((stage12.get(s1Index) + ((c >>> 4) & 0x3f))) << STAGE_2_LEFT_SHIFT)
1410                 + (c & 0xf));
1411     }
1412 
1413     private static final int FROM_U_LENGTH_SHIFT = 24;
1414     private static final int FROM_U_ROUNDTRIP_FLAG = 1 << 31;
1415     static final int FROM_U_RESERVED_MASK = 0x60000000;
1416     private static final int FROM_U_DATA_MASK = 0xffffff;
1417 
1418     /* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */
1419     static final int FROM_U_SUBCHAR1 = 0x80000001;
1420 
1421     /* at most 3 bytes in the lower part of the value */
1422     private static final int FROM_U_MAX_DIRECT_LENGTH = 3;
1423 
1424     /* maximum number of indexed bytes */
1425     static final int MAX_BYTES = 0x1f;
1426 
FROM_U_IS_PARTIAL(int value)1427     static boolean FROM_U_IS_PARTIAL(int value) {
1428         return (value >>> FROM_U_LENGTH_SHIFT) == 0;
1429     }
1430 
FROM_U_GET_PARTIAL_INDEX(int value)1431     static int FROM_U_GET_PARTIAL_INDEX(int value) {
1432         return value;
1433     }
1434 
FROM_U_IS_ROUNDTRIP(int value)1435     static boolean FROM_U_IS_ROUNDTRIP(int value) {
1436         return (value & FROM_U_ROUNDTRIP_FLAG) != 0;
1437     }
1438 
FROM_U_MASK_ROUNDTRIP(int value)1439     private static int FROM_U_MASK_ROUNDTRIP(int value) {
1440         return value & ~FROM_U_ROUNDTRIP_FLAG;
1441     }
1442 
1443     /* use after masking off the roundtrip flag */
FROM_U_GET_LENGTH(int value)1444     static int FROM_U_GET_LENGTH(int value) {
1445         return (value >>> FROM_U_LENGTH_SHIFT) & MAX_BYTES;
1446     }
1447 
1448     /* get bytes or bytes index */
FROM_U_GET_DATA(int value)1449     static int FROM_U_GET_DATA(int value) {
1450         return value & FROM_U_DATA_MASK;
1451     }
1452 
1453     /* get the pointer to an extension array from indexes[index] */
ARRAY(ByteBuffer indexes, int index, Class<?> itemType)1454     static Buffer ARRAY(ByteBuffer indexes, int index, Class<?> itemType) {
1455         int oldpos = indexes.position();
1456         Buffer b;
1457 
1458         // TODO: It is very inefficient to create Buffer objects for each array access.
1459         // We should create an inner class Extensions (or sibling class CharsetMBCSExtensions)
1460         // which has buffers for the arrays, together with the code that works with them.
1461         indexes.position(indexes.getInt(index << 2));
1462         if (itemType == int.class)
1463             b = indexes.asIntBuffer();
1464         else if (itemType == char.class)
1465             b = indexes.asCharBuffer();
1466         else if (itemType == short.class)
1467             b = indexes.asShortBuffer();
1468         else
1469             // default or (itemType == byte.class)
1470             b = indexes.slice();
1471         indexes.position(oldpos);
1472         return b;
1473     }
1474 
GET_MAX_BYTES_PER_UCHAR(ByteBuffer indexes)1475     private static int GET_MAX_BYTES_PER_UCHAR(ByteBuffer indexes) {
1476         indexes.position(0);
1477         return indexes.getInt(EXT_COUNT_BYTES) & 0xff;
1478     }
1479 
1480     /*
1481      * @return index of the UChar, if found; else <0
1482      */
findFromU(CharBuffer fromUSection, int length, char u)1483     static int findFromU(CharBuffer fromUSection, int length, char u) {
1484         int i, start, limit;
1485 
1486         /* binary search */
1487         start = 0;
1488         limit = length;
1489         for (;;) {
1490             i = limit - start;
1491             if (i <= 1) {
1492                 break; /* done */
1493             }
1494             /* start<limit-1 */
1495 
1496             if (i <= 4) {
1497                 /* linear search for the last part */
1498                 if (u <= fromUSection.get(fromUSection.position() + start)) {
1499                     break;
1500                 }
1501                 if (++start < limit && u <= fromUSection.get(fromUSection.position() + start)) {
1502                     break;
1503                 }
1504                 if (++start < limit && u <= fromUSection.get(fromUSection.position() + start)) {
1505                     break;
1506                 }
1507                 /* always break at start==limit-1 */
1508                 ++start;
1509                 break;
1510             }
1511 
1512             i = (start + limit) / 2;
1513             if (u < fromUSection.get(fromUSection.position() + i)) {
1514                 limit = i;
1515             } else {
1516                 start = i;
1517             }
1518         }
1519 
1520         /* did we really find it? */
1521         if (start < limit && u == fromUSection.get(fromUSection.position() + start)) {
1522             return start;
1523         } else {
1524             return -1; /* not found */
1525         }
1526     }
1527 
1528     /*
1529      * @return lookup value for the byte, if found; else 0
1530      */
findToU(IntBuffer toUSection, int length, short byt)1531     static int findToU(IntBuffer toUSection, int length, short byt) {
1532         long word0, word;
1533         int i, start, limit;
1534 
1535         /* check the input byte against the lowest and highest section bytes */
1536         // agljport:comment instead of receiving a start position parameter for toUSection we'll rely on its position
1537         // property
1538         start = TO_U_GET_BYTE(toUSection.get(toUSection.position()));
1539         limit = TO_U_GET_BYTE(toUSection.get(toUSection.position() + length - 1));
1540         if (byt < start || limit < byt) {
1541             return 0; /* the byte is out of range */
1542         }
1543 
1544         if (length == ((limit - start) + 1)) {
1545             /* direct access on a linear array */
1546             return TO_U_GET_VALUE(toUSection.get(toUSection.position() + byt - start)); /* could be 0 */
1547         }
1548 
1549         /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */
1550         word0 = TO_U_MAKE_WORD((byte) byt, 0) & UConverterConstants.UNSIGNED_INT_MASK;
1551 
1552         /*
1553          * Shift byte once instead of each section word and add 0xffffff. We will compare the shifted/added byte
1554          * (bbffffff) against section words which have byte values in the same bit position. If and only if byte bb <
1555          * section byte ss then bbffffff<ssvvvvvv for all v=0..f so we need not mask off the lower 24 bits of each
1556          * section word.
1557          */
1558         word = word0 | TO_U_VALUE_MASK;
1559 
1560         /* binary search */
1561         start = 0;
1562         limit = length;
1563         for (;;) {
1564             i = limit - start;
1565             if (i <= 1) {
1566                 break; /* done */
1567             }
1568             /* start<limit-1 */
1569 
1570             if (i <= 4) {
1571                 /* linear search for the last part */
1572                 if (word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {
1573                     break;
1574                 }
1575                 if (++start < limit
1576                         && word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {
1577                     break;
1578                 }
1579                 if (++start < limit
1580                         && word0 <= (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK)) {
1581                     break;
1582                 }
1583                 /* always break at start==limit-1 */
1584                 ++start;
1585                 break;
1586             }
1587 
1588             i = (start + limit) / 2;
1589             if (word < (toUSection.get(toUSection.position() + i) & UConverterConstants.UNSIGNED_INT_MASK)) {
1590                 limit = i;
1591             } else {
1592                 start = i;
1593             }
1594         }
1595 
1596         /* did we really find it? */
1597         if (start < limit) {
1598             word = (toUSection.get(toUSection.position() + start) & UConverterConstants.UNSIGNED_INT_MASK);
1599             if (byt == TO_U_GET_BYTE((int)word)) {
1600                 return TO_U_GET_VALUE((int) word); /* never 0 */
1601             }
1602         }
1603         return 0; /* not found */
1604     }
1605 
1606     /*
1607      * TRUE if not an SI/SO stateful converter, or if the match length fits with the current converter state
1608      */
TO_U_VERIFY_SISO_MATCH(byte sisoState, int match)1609     static boolean TO_U_VERIFY_SISO_MATCH(byte sisoState, int match) {
1610         return sisoState < 0 || (sisoState == 0) == (match == 1);
1611     }
1612 
1613     /*
1614      * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), or 1 for DBCS-only, or -1 if the converter is not
1615      * SI/SO stateful
1616      *
1617      * Note: For SI/SO stateful converters getting here, cnv->mode==0 is equivalent to firstLength==1.
1618      */
SISO_STATE(UConverterSharedData sharedData, int mode)1619     private static int SISO_STATE(UConverterSharedData sharedData, int mode) {
1620         return sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO ? (byte) mode
1621                 : sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY ? 1 : -1;
1622     }
1623 
1624     class CharsetDecoderMBCS extends CharsetDecoderICU {
1625 
CharsetDecoderMBCS(CharsetICU cs)1626         CharsetDecoderMBCS(CharsetICU cs) {
1627             super(cs);
1628         }
1629 
1630         @Override
decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush)1631         protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
1632         /* Just call cnvMBCSToUnicodeWithOffsets() to remove duplicate code. */
1633             return cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);
1634         }
1635 
1636         /*
1637          * continue partial match with new input never called for simple, single-character conversion
1638          */
continueMatchToU(ByteBuffer source, CharBuffer target, IntBuffer offsets, int srcIndex, boolean flush)1639         private CoderResult continueMatchToU(ByteBuffer source, CharBuffer target, IntBuffer offsets, int srcIndex,
1640                 boolean flush) {
1641             CoderResult cr = CoderResult.UNDERFLOW;
1642 
1643             int[] value = new int[1];
1644             int match, length;
1645 
1646             match = matchToU((byte) SISO_STATE(sharedData, mode), preToUArray, preToUBegin, preToULength, source,
1647                     value, isToUUseFallback(), flush);
1648 
1649             if (match > 0) {
1650                 if (match >= preToULength) {
1651                     /* advance src pointer for the consumed input */
1652                     source.position(source.position() + match - preToULength);
1653                     preToULength = 0;
1654                 } else {
1655                     /* the match did not use all of preToU[] - keep the rest for replay */
1656                     length = preToULength - match;
1657                     System.arraycopy(preToUArray, preToUBegin + match, preToUArray, preToUBegin, length);
1658                     preToULength = (byte) -length;
1659                 }
1660 
1661                 /* write result */
1662                 cr = writeToU(value[0], target, offsets, srcIndex);
1663             } else if (match < 0) {
1664                 /* save state for partial match */
1665                 int j, sArrayIndex;
1666 
1667                 /* just _append_ the newly consumed input to preToU[] */
1668                 sArrayIndex = source.position();
1669                 match = -match;
1670                 for (j = preToULength; j < match; ++j) {
1671                     preToUArray[j] = source.get(sArrayIndex++);
1672                 }
1673                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
1674                 preToULength = (byte) match;
1675             } else /* match==0 */{
1676                 /*
1677                  * no match
1678                  *
1679                  * We need to split the previous input into two parts:
1680                  *
1681                  * 1. The first codepage character is unmappable - that's how we got into trying the extension data in
1682                  * the first place. We need to move it from the preToU buffer to the error buffer, set an error code,
1683                  * and prepare the rest of the previous input for 2.
1684                  *
1685                  * 2. The rest of the previous input must be converted once we come back from the callback for the first
1686                  * character. At that time, we have to try again from scratch to convert these input characters. The
1687                  * replay will be handled by the ucnv.c conversion code.
1688                  */
1689 
1690                 /* move the first codepage character to the error field */
1691                 System.arraycopy(preToUArray, preToUBegin, toUBytesArray, toUBytesBegin, preToUFirstLength);
1692                 toULength = preToUFirstLength;
1693 
1694                 /* move the rest up inside the buffer */
1695                 length = preToULength - preToUFirstLength;
1696                 if (length > 0) {
1697                     System.arraycopy(preToUArray, preToUBegin + preToUFirstLength, preToUArray, preToUBegin, length);
1698                 }
1699 
1700                 /* mark preToU for replay */
1701                 preToULength = (byte) -length;
1702 
1703                 /* set the error code for unassigned */
1704                 cr = CoderResult.unmappableForLength(preToUFirstLength);
1705             }
1706             return cr;
1707         }
1708 
1709         /*
1710          * this works like matchFromU() except - the first character is in pre - no trie is used - the returned
1711          * matchLength is not offset by 2
1712          */
matchToU(byte sisoState, byte[] preArray, int preArrayBegin, int preLength, ByteBuffer source, int[] pMatchValue, boolean isUseFallback, boolean flush)1713         private int matchToU(byte sisoState, byte[] preArray, int preArrayBegin, int preLength, ByteBuffer source,
1714                 int[] pMatchValue, boolean isUseFallback, boolean flush) {
1715             ByteBuffer cx = sharedData.mbcs.extIndexes;
1716             IntBuffer toUTable, toUSection;
1717 
1718             int value, matchValue, srcLength = 0;
1719             int i, j, index, length, matchLength;
1720             short b;
1721 
1722             if (cx == null || cx.asIntBuffer().get(EXT_TO_U_LENGTH) <= 0) {
1723                 return 0; /* no extension data, no match */
1724             }
1725 
1726             /* initialize */
1727             toUTable = (IntBuffer) ARRAY(cx, EXT_TO_U_INDEX, int.class);
1728             index = 0;
1729 
1730             matchValue = 0;
1731             i = j = matchLength = 0;
1732             if (source != null) {
1733                 srcLength = source.remaining();
1734             }
1735 
1736             if (sisoState == 0) {
1737                 /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
1738                 if (preLength > 1) {
1739                     return 0; /* no match of a DBCS sequence in SBCS mode */
1740                 } else if (preLength == 1) {
1741                     srcLength = 0;
1742                 } else /* preLength==0 */{
1743                     if (srcLength > 1) {
1744                         srcLength = 1;
1745                     }
1746                 }
1747                 flush = true;
1748             }
1749 
1750             /* we must not remember fallback matches when not using fallbacks */
1751 
1752             /* match input units until there is a full match or the input is consumed */
1753             for (;;) {
1754                 /* go to the next section */
1755                 int oldpos = toUTable.position();
1756                 toUSection = ((IntBuffer) toUTable.position(index)).slice();
1757                 toUTable.position(oldpos);
1758 
1759                 /* read first pair of the section */
1760                 value = toUSection.get();
1761                 length = TO_U_GET_BYTE(value);
1762                 value = TO_U_GET_VALUE(value);
1763                 if (value != 0 && (TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback))
1764                         && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
1765                     /* remember longest match so far */
1766                     matchValue = value;
1767                     matchLength = i + j;
1768                 }
1769 
1770                 /* match pre[] then src[] */
1771                 if (i < preLength) {
1772                     b = (short) (preArray[preArrayBegin + i++] & UConverterConstants.UNSIGNED_BYTE_MASK);
1773                 } else if (j < srcLength) {
1774                     b = (short) (source.get(source.position() + j++) & UConverterConstants.UNSIGNED_BYTE_MASK);
1775                 } else {
1776                     /* all input consumed, partial match */
1777                     if (flush || (length = (i + j)) > MAX_BYTES) {
1778                         /*
1779                          * end of the entire input stream, stop with the longest match so far or: partial match must not
1780                          * be longer than UCNV_EXT_MAX_BYTES because it must fit into state buffers
1781                          */
1782                         break;
1783                     } else {
1784                         /* continue with more input next time */
1785                         return -length;
1786                     }
1787                 }
1788 
1789                 /* search for the current UChar */
1790                 value = findToU(toUSection, length, b);
1791                 if (value == 0) {
1792                     /* no match here, stop with the longest match so far */
1793                     break;
1794                 } else {
1795                     if (TO_U_IS_PARTIAL(value)) {
1796                         /* partial match, continue */
1797                         index = TO_U_GET_PARTIAL_INDEX(value);
1798                     } else {
1799                         if ((TO_U_IS_ROUNDTRIP(value) || isToUUseFallback(isUseFallback)) && TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
1800                             /* full match, stop with result */
1801                             matchValue = value;
1802                             matchLength = i + j;
1803                         } else {
1804                             /* full match on fallback not taken, stop with the longest match so far */
1805                         }
1806                         break;
1807                     }
1808                 }
1809             }
1810 
1811             if (matchLength == 0) {
1812                 /* no match at all */
1813                 return 0;
1814             }
1815 
1816             /* return result */
1817             pMatchValue[0] = TO_U_MASK_ROUNDTRIP(matchValue);
1818             return matchLength;
1819         }
1820 
writeToU(int value, CharBuffer target, IntBuffer offsets, int srcIndex)1821         private CoderResult writeToU(int value, CharBuffer target, IntBuffer offsets, int srcIndex) {
1822             ByteBuffer cx = sharedData.mbcs.extIndexes;
1823             /* output the result */
1824             if (TO_U_IS_CODE_POINT(value)) {
1825                 /* output a single code point */
1826                 return toUWriteCodePoint(TO_U_GET_CODE_POINT(value), target, offsets, srcIndex);
1827             } else {
1828                 /* output a string - with correct data we have resultLength>0 */
1829 
1830                 char[] a = new char[TO_U_GET_LENGTH(value)];
1831                 CharBuffer cb = ((CharBuffer) ARRAY(cx, EXT_TO_U_UCHARS_INDEX, char.class));
1832                 cb.position(TO_U_GET_INDEX(value));
1833                 cb.get(a, 0, a.length);
1834                 return toUWriteUChars(this, a, 0, a.length, target, offsets, srcIndex);
1835             }
1836         }
1837 
toUWriteCodePoint(int c, CharBuffer target, IntBuffer offsets, int sourceIndex)1838         private CoderResult toUWriteCodePoint(int c, CharBuffer target, IntBuffer offsets, int sourceIndex) {
1839             CoderResult cr = CoderResult.UNDERFLOW;
1840             int tBeginIndex = target.position();
1841 
1842             if (target.hasRemaining()) {
1843                 if (c <= 0xffff) {
1844                     target.put((char) c);
1845                     c = UConverterConstants.U_SENTINEL;
1846                 } else /* c is a supplementary code point */{
1847                     target.put(UTF16.getLeadSurrogate(c));
1848                     c = UTF16.getTrailSurrogate(c);
1849                     if (target.hasRemaining()) {
1850                         target.put((char) c);
1851                         c = UConverterConstants.U_SENTINEL;
1852                     }
1853                 }
1854 
1855                 /* write offsets */
1856                 if (offsets != null) {
1857                     offsets.put(sourceIndex);
1858                     if ((tBeginIndex + 1) < target.position()) {
1859                         offsets.put(sourceIndex);
1860                     }
1861                 }
1862             }
1863 
1864             /* write overflow from c */
1865             if (c >= 0) {
1866                 charErrorBufferLength = UTF16.append(charErrorBufferArray, 0, c);
1867                 cr = CoderResult.OVERFLOW;
1868             }
1869 
1870             return cr;
1871         }
1872 
1873         /*
1874          * Input sequence: cnv->toUBytes[0..length[ @return if(U_FAILURE) return the length (toULength, byteIndex) for
1875          * the input else return 0 after output has been written to the target
1876          */
toU(int length, ByteBuffer source, CharBuffer target, IntBuffer offsets, int sourceIndex, boolean flush, CoderResult[] cr)1877         private int toU(int length, ByteBuffer source, CharBuffer target, IntBuffer offsets, int sourceIndex,
1878                 boolean flush, CoderResult[] cr) {
1879             // ByteBuffer cx;
1880 
1881             if (sharedData.mbcs.extIndexes != null
1882                     && initialMatchToU(length, source, target, offsets, sourceIndex, flush, cr)) {
1883                 return 0; /* an extension mapping handled the input */
1884             }
1885 
1886             /* GB 18030 */
1887             if (length == 4 && (options & MBCS_OPTION_GB18030) != 0) {
1888                 int[] range;
1889                 int linear;
1890                 int i;
1891 
1892                 linear = LINEAR_18030(toUBytesArray[0], toUBytesArray[1], toUBytesArray[2], toUBytesArray[3]);
1893                 for (i = 0; i < gb18030Ranges.length; ++i) {
1894                     range = gb18030Ranges[i];
1895                     if (range[2] <= linear && linear <= range[3]) {
1896                         /* found the sequence, output the Unicode code point for it */
1897                         cr[0] = CoderResult.UNDERFLOW;
1898 
1899                         /* add the linear difference between the input and start sequences to the start code point */
1900                         linear = range[0] + (linear - range[2]);
1901 
1902                         /* output this code point */
1903                         cr[0] = toUWriteCodePoint(linear, target, offsets, sourceIndex);
1904 
1905                         return 0;
1906                     }
1907                 }
1908             }
1909 
1910             /* no mapping */
1911             cr[0] = CoderResult.unmappableForLength(length);
1912             return length;
1913         }
1914 
1915         /*
1916          * target<targetLimit; set error code for overflow
1917          */
initialMatchToU(int firstLength, ByteBuffer source, CharBuffer target, IntBuffer offsets, int srcIndex, boolean flush, CoderResult[] cr)1918         private boolean initialMatchToU(int firstLength, ByteBuffer source, CharBuffer target, IntBuffer offsets,
1919                 int srcIndex, boolean flush, CoderResult[] cr) {
1920             int[] value = new int[1];
1921             int match = 0;
1922 
1923             /* try to match */
1924             match = matchToU((byte) SISO_STATE(sharedData, mode), toUBytesArray, toUBytesBegin, firstLength, source,
1925                     value, isToUUseFallback(), flush);
1926             if (match > 0) {
1927                 /* advance src pointer for the consumed input */
1928                 source.position(source.position() + match - firstLength);
1929 
1930                 /* write result to target */
1931                 cr[0] = writeToU(value[0], target, offsets, srcIndex);
1932                 return true;
1933             } else if (match < 0) {
1934                 /* save state for partial match */
1935                 byte[] sArray;
1936                 int sArrayIndex;
1937                 int j;
1938 
1939                 /* copy the first code point */
1940                 sArray = toUBytesArray;
1941                 sArrayIndex = toUBytesBegin;
1942                 preToUFirstLength = (byte) firstLength;
1943                 for (j = 0; j < firstLength; ++j) {
1944                     preToUArray[j] = sArray[sArrayIndex++];
1945                 }
1946 
1947                 /* now copy the newly consumed input */
1948                 sArrayIndex = source.position();
1949                 match = -match;
1950                 for (; j < match; ++j) {
1951                     preToUArray[j] = source.get(sArrayIndex++);
1952                 }
1953                 source.position(sArrayIndex);
1954                 preToULength = (byte) match;
1955                 return true;
1956             } else /* match==0 no match */{
1957                 return false;
1958             }
1959         }
1960 
simpleMatchToU(ByteBuffer source, boolean useFallback)1961         private int simpleMatchToU(ByteBuffer source, boolean useFallback) {
1962             int[] value = new int[1];
1963             int match;
1964 
1965             if (source.remaining() <= 0) {
1966                 return 0xffff;
1967             }
1968 
1969             /* try to match */
1970             byte[] sourceArray;
1971             int sourcePosition, sourceLimit;
1972             if (source.isReadOnly()) {
1973                 // source.array() would throw an exception
1974                 sourcePosition = source.position();  // relative to source.array()
1975                 sourceArray = new byte[Math.min(source.remaining(), EXT_MAX_BYTES)];
1976                 source.get(sourceArray).position(sourcePosition);
1977                 sourcePosition = 0;  // relative to sourceArray
1978                 sourceLimit = sourceArray.length;
1979             } else {
1980                 sourceArray = source.array();
1981                 sourcePosition = source.position();
1982                 sourceLimit = source.limit();
1983             }
1984             match = matchToU((byte) -1, sourceArray, sourcePosition, sourceLimit, null, value, useFallback, true);
1985 
1986             if (match == source.remaining()) {
1987                 /* write result for simple, single-character conversion */
1988                 if (TO_U_IS_CODE_POINT(value[0])) {
1989                     return TO_U_GET_CODE_POINT(value[0]);
1990                 }
1991             }
1992 
1993             /*
1994              * return no match because - match>0 && value points to string: simple conversion cannot handle multiple
1995              * code points - match>0 && match!=length: not all input consumed, forbidden for this function - match==0:
1996              * no match found in the first place - match<0: partial match, not supported for simple conversion (and
1997              * flush==TRUE)
1998              */
1999             return 0xfffe;
2000         }
2001 
cnvMBCSToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush)2002         CoderResult cnvMBCSToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
2003             CoderResult[] cr = { CoderResult.UNDERFLOW };
2004 
2005             int sourceArrayIndex, sourceArrayIndexStart;
2006             int stateTable[][/* 256 */];
2007             char[] unicodeCodeUnits;
2008 
2009             int offset;
2010             byte state;
2011             int byteIndex;
2012             byte[] bytes;
2013 
2014             int sourceIndex, nextSourceIndex;
2015 
2016             int entry = 0;
2017             char c;
2018             byte action;
2019 
2020             if (preToULength > 0) {
2021                 /*
2022                  * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change with
2023                  * continuous offsets
2024                  */
2025                 cr[0] = continueMatchToU(source, target, offsets, -1, flush);
2026 
2027                 if (cr[0].isError() || preToULength < 0) {
2028                     return cr[0];
2029                 }
2030             }
2031 
2032             if (sharedData.mbcs.countStates == 1) {
2033                 if (!sharedData.mbcs.hasSupplementary()) {
2034                     cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
2035                 } else {
2036                     cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
2037                 }
2038                 return cr[0];
2039             }
2040 
2041             /* set up the local pointers */
2042             sourceArrayIndex = sourceArrayIndexStart = source.position();
2043 
2044             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
2045                 stateTable = sharedData.mbcs.swapLFNLStateTable;
2046             } else {
2047                 stateTable = sharedData.mbcs.stateTable;
2048             }
2049             unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
2050 
2051             /* get the converter state from UConverter */
2052             offset = toUnicodeStatus;
2053             byteIndex = toULength;
2054             bytes = toUBytesArray;
2055 
2056             /*
2057              * if we are in the SBCS state for a DBCS-only converter, then load the DBCS state from the MBCS data
2058              * (dbcsOnlyState==0 if it is not a DBCS-only converter)
2059              */
2060             state = (byte)mode;
2061             if (state == 0) {
2062                 state = sharedData.mbcs.dbcsOnlyState;
2063             }
2064 
2065             /* sourceIndex=-1 if the current character began in the previous buffer */
2066             sourceIndex = byteIndex == 0 ? 0 : -1;
2067             nextSourceIndex = 0;
2068 
2069             /* conversion loop */
2070             while (sourceArrayIndex < source.limit()) {
2071                 /*
2072                  * This following test is to see if available input would overflow the output. It does not catch output
2073                  * of more than one code unit that overflows as a result of a surrogate pair or callback output from the
2074                  * last source byte. Therefore, those situations also test for overflows and will then break the loop,
2075                  * too.
2076                  */
2077                 if (!target.hasRemaining()) {
2078                     /* target is full */
2079                     cr[0] = CoderResult.OVERFLOW;
2080                     break;
2081                 }
2082 
2083                 if (byteIndex == 0) {
2084                     /* optimized loop for 1/2-byte input and BMP output */
2085                     // agljport:todo see ucnvmbcs.c for deleted block
2086                     do {
2087                         entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK];
2088                         if (MBCS_ENTRY_IS_TRANSITION(entry)) {
2089                             state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
2090                             offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
2091                             ++sourceArrayIndex;
2092                             if (sourceArrayIndex < source.limit()
2093                                     && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK])
2094                                     && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
2095                                     && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
2096                                 ++sourceArrayIndex;
2097                                 target.put(c);
2098                                 if (offsets != null) {
2099                                     offsets.put(sourceIndex);
2100                                     sourceIndex = (nextSourceIndex += 2);
2101                                 }
2102                                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
2103                                 offset = 0;
2104                             } else {
2105                                 /* set the state and leave the optimized loop */
2106                                 ++nextSourceIndex;
2107                                 bytes[0] = source.get(sourceArrayIndex - 1);
2108                                 byteIndex = 1;
2109                                 break;
2110                             }
2111                         } else {
2112                             if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
2113                                 /* output BMP code point */
2114                                 ++sourceArrayIndex;
2115                                 target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
2116                                 if (offsets != null) {
2117                                     offsets.put(sourceIndex);
2118                                     sourceIndex = ++nextSourceIndex;
2119                                 }
2120                                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
2121                             } else {
2122                                 /* leave the optimized loop */
2123                                 break;
2124                             }
2125                         }
2126                     } while (sourceArrayIndex < source.limit() && target.hasRemaining());
2127                     /*
2128                      * these tests and break statements could be put inside the loop if C had "break outerLoop" like
2129                      * Java
2130                      */
2131                     if (sourceArrayIndex >= source.limit()) {
2132                         break;
2133                     }
2134                     if (!target.hasRemaining()) {
2135                         /* target is full */
2136                         cr[0] = CoderResult.OVERFLOW;
2137                         break;
2138                     }
2139 
2140                     ++nextSourceIndex;
2141                     bytes[byteIndex++] = source.get(sourceArrayIndex++);
2142                 } else /* byteIndex>0 */{
2143                     ++nextSourceIndex;
2144                     entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++))
2145                             & UConverterConstants.UNSIGNED_BYTE_MASK];
2146                 }
2147 
2148                 if (MBCS_ENTRY_IS_TRANSITION(entry)) {
2149                     state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
2150                     offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
2151                     continue;
2152                 }
2153 
2154                 /* save the previous state for proper extension mapping with SI/SO-stateful converters */
2155                 mode = state;
2156 
2157                 /* set the next state early so that we can reuse the entry variable */
2158                 state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
2159 
2160                 /*
2161                  * An if-else-if chain provides more reliable performance for the most common cases compared to a
2162                  * switch.
2163                  */
2164                 action = (byte)MBCS_ENTRY_FINAL_ACTION(entry);
2165                 if (action == MBCS_STATE_VALID_16) {
2166                     offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
2167                     c = unicodeCodeUnits[offset];
2168                     if (c < 0xfffe) {
2169                         /* output BMP code point */
2170                         target.put(c);
2171                         if (offsets != null) {
2172                             offsets.put(sourceIndex);
2173                         }
2174                         byteIndex = 0;
2175                     } else if (c == 0xfffe) {
2176                         if (isFallbackUsed() && (entry = getFallback(sharedData.mbcs, offset)) != 0xfffe) {
2177                             /* output fallback BMP code point */
2178                             target.put((char)entry);
2179                             if (offsets != null) {
2180                                 offsets.put(sourceIndex);
2181                             }
2182                             byteIndex = 0;
2183                         }
2184                     } else {
2185                         /* callback(illegal) */
2186                         cr[0] = CoderResult.malformedForLength(byteIndex);
2187                     }
2188                 } else if (action == MBCS_STATE_VALID_DIRECT_16) {
2189                     /* output BMP code point */
2190                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
2191                     if (offsets != null) {
2192                         offsets.put(sourceIndex);
2193                     }
2194                     byteIndex = 0;
2195                 } else if (action == MBCS_STATE_VALID_16_PAIR) {
2196                     offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
2197                     c = unicodeCodeUnits[offset++];
2198                     if (c < 0xd800) {
2199                         /* output BMP code point below 0xd800 */
2200                         target.put(c);
2201                         if (offsets != null) {
2202                             offsets.put(sourceIndex);
2203                         }
2204                         byteIndex = 0;
2205                     } else if (isFallbackUsed() ? c <= 0xdfff : c <= 0xdbff) {
2206                         /* output roundtrip or fallback surrogate pair */
2207                         target.put((char)(c & 0xdbff));
2208                         if (offsets != null) {
2209                             offsets.put(sourceIndex);
2210                         }
2211                         byteIndex = 0;
2212                         if (target.hasRemaining()) {
2213                             target.put(unicodeCodeUnits[offset]);
2214                             if (offsets != null) {
2215                                 offsets.put(sourceIndex);
2216                             }
2217                         } else {
2218                             /* target overflow */
2219                             charErrorBufferArray[0] = unicodeCodeUnits[offset];
2220                             charErrorBufferLength = 1;
2221                             cr[0] = CoderResult.OVERFLOW;
2222 
2223                             offset = 0;
2224                             break;
2225                         }
2226                     } else if (isFallbackUsed() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
2227                         /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
2228                         target.put(unicodeCodeUnits[offset]);
2229                         if (offsets != null) {
2230                             offsets.put(sourceIndex);
2231                         }
2232                         byteIndex = 0;
2233                     } else if (c == 0xffff) {
2234                         /* callback(illegal) */
2235                         cr[0] = CoderResult.malformedForLength(byteIndex);
2236                     }
2237                 } else if (action == MBCS_STATE_VALID_DIRECT_20
2238                         || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
2239                     entry = MBCS_ENTRY_FINAL_VALUE(entry);
2240                     /* output surrogate pair */
2241                     target.put((char)(0xd800 | (char)(entry >> 10)));
2242                     if (offsets != null) {
2243                         offsets.put(sourceIndex);
2244                     }
2245                     byteIndex = 0;
2246                     c = (char)(0xdc00 | (char)(entry & 0x3ff));
2247                     if (target.hasRemaining()) {
2248                         target.put(c);
2249                         if (offsets != null) {
2250                             offsets.put(sourceIndex);
2251                         }
2252                     } else {
2253                         /* target overflow */
2254                         charErrorBufferArray[0] = c;
2255                         charErrorBufferLength = 1;
2256                         cr[0] = CoderResult.OVERFLOW;
2257 
2258                         offset = 0;
2259                         break;
2260                     }
2261                 } else if (action == MBCS_STATE_CHANGE_ONLY) {
2262                     /*
2263                      * This serves as a state change without any output. It is useful for reading simple stateful
2264                      * encodings, for example using just Shift-In/Shift-Out codes. The 21 unused bits may later be used
2265                      * for more sophisticated state transitions.
2266                      */
2267                     if (sharedData.mbcs.dbcsOnlyState == 0) {
2268                         byteIndex = 0;
2269                     } else {
2270                         /* SI/SO are illegal for DBCS-only conversion */
2271                         state = (byte)(mode); /* restore the previous state */
2272 
2273                         /* callback(illegal) */
2274                         cr[0] = CoderResult.malformedForLength(byteIndex);
2275                     }
2276                 } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
2277                     if (isFallbackUsed()) {
2278                         /* output BMP code point */
2279                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
2280                         if (offsets != null) {
2281                             offsets.put(sourceIndex);
2282                         }
2283                         byteIndex = 0;
2284                     }
2285                 } else if (action == MBCS_STATE_UNASSIGNED) {
2286                     /* just fall through */
2287                 } else if (action == MBCS_STATE_ILLEGAL) {
2288                     /* callback(illegal) */
2289                     cr[0] = CoderResult.malformedForLength(byteIndex);
2290                 } else {
2291                     /* reserved, must never occur */
2292                     byteIndex = 0;
2293                 }
2294 
2295                 /* end of action codes: prepare for a new character */
2296                 offset = 0;
2297 
2298                 if (byteIndex == 0) {
2299                     sourceIndex = nextSourceIndex;
2300                 } else if (cr[0].isError()) {
2301                     /* callback(illegal) */
2302                     if (byteIndex > 1) {
2303                         /*
2304                          * Ticket 5691: consistent illegal sequences:
2305                          * - We include at least the first byte in the illegal sequence.
2306                          * - If any of the non-initial bytes could be the start of a character,
2307                          *   we stop the illegal sequence before the first one of those.
2308                          */
2309                         boolean isDBCSOnly = (sharedData.mbcs.dbcsOnlyState != 0);
2310                         byte i;
2311                         for (i = 1; i < byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, (short)(bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK)); i++) {}
2312                         if (i < byteIndex) {
2313                             byte backOutDistance = (byte)(byteIndex - i);
2314                             int bytesFromThisBuffer = sourceArrayIndex - sourceArrayIndexStart;
2315                             byteIndex = i; /* length of reported illegal byte sequence */
2316                             if (backOutDistance <= bytesFromThisBuffer) {
2317                                 sourceArrayIndex -= backOutDistance;
2318                             } else {
2319                                 /* Back out bytes from the previous buffer: Need to replay them. */
2320                                 this.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
2321                                 /* preToULength is negative! */
2322                                 for (int n = 0; n < -this.preToULength; n++) {
2323                                     this.preToUArray[n] = bytes[i+n];
2324                                 }
2325                                 sourceArrayIndex = sourceArrayIndexStart;
2326                             }
2327                         }
2328                     }
2329                     break;
2330                 } else /* unassigned sequences indicated with byteIndex>0 */{
2331                     /* try an extension mapping */
2332                     int sourceBeginIndex = sourceArrayIndex;
2333                     source.position(sourceArrayIndex);
2334                     byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr);
2335                     sourceArrayIndex = source.position();
2336                     sourceIndex = nextSourceIndex += (sourceArrayIndex - sourceBeginIndex);
2337 
2338                     if (cr[0].isError() || cr[0].isOverflow()) {
2339                         /* not mappable or buffer overflow */
2340                         break;
2341                     }
2342                 }
2343             }
2344 
2345             /* set the converter state back into UConverter */
2346             toUnicodeStatus = offset;
2347             mode = state;
2348             toULength = byteIndex;
2349 
2350             /* write back the updated pointers */
2351             source.position(sourceArrayIndex);
2352 
2353             return cr[0];
2354         }
2355         /*
2356          * This version of cnvMBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages that
2357          * only map to and from the BMP. In addition to single-byte optimizations, the offset calculations become much
2358          * easier.
2359          */
cnvMBCSSingleToBMPWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush)2360         private CoderResult cnvMBCSSingleToBMPWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets,
2361                 boolean flush) {
2362             CoderResult[] cr = { CoderResult.UNDERFLOW };
2363 
2364             int sourceArrayIndex, lastSource;
2365             int targetCapacity, length;
2366             int[][] stateTable;
2367 
2368             int sourceIndex;
2369 
2370             int entry;
2371             byte action;
2372 
2373             /* set up the local pointers */
2374             sourceArrayIndex = source.position();
2375             targetCapacity = target.remaining();
2376 
2377             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
2378                 stateTable = sharedData.mbcs.swapLFNLStateTable;
2379             } else {
2380                 stateTable = sharedData.mbcs.stateTable;
2381             }
2382 
2383             /* sourceIndex=-1 if the current character began in the previous buffer */
2384             sourceIndex = 0;
2385             lastSource = sourceArrayIndex;
2386 
2387             /*
2388              * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the
2389              * sourceLength and targetCapacity
2390              */
2391             length = source.remaining();
2392             if (length < targetCapacity) {
2393                 targetCapacity = length;
2394             }
2395 
2396             /* conversion loop */
2397             while (targetCapacity > 0 && sourceArrayIndex < source.limit()) {
2398                 entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];
2399                 /* MBCS_ENTRY_IS_FINAL(entry) */
2400 
2401                 /* test the most common case first */
2402                 if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
2403                     /* output BMP code point */
2404                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
2405                     --targetCapacity;
2406                     continue;
2407                 }
2408 
2409                 /*
2410                  * An if-else-if chain provides more reliable performance for the most common cases compared to a
2411                  * switch.
2412                  */
2413                 action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));
2414                 if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
2415                     if (isFallbackUsed()) {
2416                         /* output BMP code point */
2417                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
2418                         --targetCapacity;
2419                         continue;
2420                     }
2421                 } else if (action == MBCS_STATE_UNASSIGNED) {
2422                     /* just fall through */
2423                 } else if (action == MBCS_STATE_ILLEGAL) {
2424                     /* callback(illegal) */
2425                     cr[0] = CoderResult.malformedForLength(sourceArrayIndex - lastSource);
2426                 } else {
2427                     /* reserved, must never occur */
2428                     continue;
2429                 }
2430 
2431                 /* set offsets since the start or the last extension */
2432                 if (offsets != null) {
2433                     int count = sourceArrayIndex - lastSource;
2434 
2435                     /* predecrement: do not set the offset for the callback-causing character */
2436                     while (--count > 0) {
2437                         offsets.put(sourceIndex++);
2438                     }
2439                     /* offset and sourceIndex are now set for the current character */
2440                 }
2441 
2442                 if (cr[0].isError()) {
2443                     /* callback(illegal) */
2444                     break;
2445                 } else /* unassigned sequences indicated with byteIndex>0 */{
2446                     /* try an extension mapping */
2447                     lastSource = sourceArrayIndex;
2448                     toUBytesArray[0] = source.get(sourceArrayIndex - 1);
2449                     source.position(sourceArrayIndex);
2450                     toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr);
2451                     sourceArrayIndex = source.position();
2452                     sourceIndex += 1 + (sourceArrayIndex - lastSource);
2453 
2454                     if (cr[0].isError()) {
2455                         /* not mappable or buffer overflow */
2456                         break;
2457                     }
2458 
2459                     /* recalculate the targetCapacity after an extension mapping */
2460                     targetCapacity = target.remaining();
2461                     length = source.remaining();
2462                     if (length < targetCapacity) {
2463                         targetCapacity = length;
2464                     }
2465                 }
2466             }
2467 
2468             if (!cr[0].isError() && sourceArrayIndex < source.limit() && !target.hasRemaining()) {
2469                 /* target is full */
2470                 cr[0] = CoderResult.OVERFLOW;
2471             }
2472 
2473             /* set offsets since the start or the last callback */
2474             if (offsets != null) {
2475                 int count = sourceArrayIndex - lastSource;
2476                 while (count > 0) {
2477                     offsets.put(sourceIndex++);
2478                     --count;
2479                 }
2480             }
2481 
2482             /* write back the updated pointers */
2483             source.position(sourceArrayIndex);
2484 
2485             return cr[0];
2486         }
2487 
2488         /* This version of cnvMBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
cnvMBCSSingleToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush)2489         private CoderResult cnvMBCSSingleToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets,
2490                 boolean flush) {
2491             CoderResult[] cr = { CoderResult.UNDERFLOW };
2492 
2493             int sourceArrayIndex;
2494             int[][] stateTable;
2495 
2496             int sourceIndex;
2497 
2498             int entry;
2499             char c;
2500             byte action;
2501 
2502             /* set up the local pointers */
2503             sourceArrayIndex = source.position();
2504 
2505             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
2506                 stateTable = sharedData.mbcs.swapLFNLStateTable;
2507             } else {
2508                 stateTable = sharedData.mbcs.stateTable;
2509             }
2510 
2511             /* sourceIndex=-1 if the current character began in the previous buffer */
2512             sourceIndex = 0;
2513 
2514             /* conversion loop */
2515             while (sourceArrayIndex < source.limit()) {
2516                 /*
2517                  * This following test is to see if available input would overflow the output. It does not catch output
2518                  * of more than one code unit that overflows as a result of a surrogate pair or callback output from the
2519                  * last source byte. Therefore, those situations also test for overflows and will then break the loop,
2520                  * too.
2521                  */
2522                 if (!target.hasRemaining()) {
2523                     /* target is full */
2524                     cr[0] = CoderResult.OVERFLOW;
2525                     break;
2526                 }
2527 
2528                 entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];
2529                 /* MBCS_ENTRY_IS_FINAL(entry) */
2530 
2531                 /* test the most common case first */
2532                 if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
2533                     /* output BMP code point */
2534                     target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
2535                     if (offsets != null) {
2536                         offsets.put(sourceIndex);
2537                     }
2538 
2539                     /* normal end of action codes: prepare for a new character */
2540                     ++sourceIndex;
2541                     continue;
2542                 }
2543 
2544                 /*
2545                  * An if-else-if chain provides more reliable performance for the most common cases compared to a
2546                  * switch.
2547                  */
2548                 action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));
2549                 if (action == MBCS_STATE_VALID_DIRECT_20
2550                         || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
2551 
2552                     entry = MBCS_ENTRY_FINAL_VALUE(entry);
2553                     /* output surrogate pair */
2554                     target.put((char) (0xd800 | (char) (entry >>> 10)));
2555                     if (offsets != null) {
2556                         offsets.put(sourceIndex);
2557                     }
2558                     c = (char) (0xdc00 | (char) (entry & 0x3ff));
2559                     if (target.hasRemaining()) {
2560                         target.put(c);
2561                         if (offsets != null) {
2562                             offsets.put(sourceIndex);
2563                         }
2564                     } else {
2565                         /* target overflow */
2566                         charErrorBufferArray[0] = c;
2567                         charErrorBufferLength = 1;
2568                         cr[0] = CoderResult.OVERFLOW;
2569                         break;
2570                     }
2571 
2572                     ++sourceIndex;
2573                     continue;
2574                 } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
2575                     if (isFallbackUsed()) {
2576                         /* output BMP code point */
2577                         target.put(MBCS_ENTRY_FINAL_VALUE_16(entry));
2578                         if (offsets != null) {
2579                             offsets.put(sourceIndex);
2580                         }
2581 
2582                         ++sourceIndex;
2583                         continue;
2584                     }
2585                 } else if (action == MBCS_STATE_UNASSIGNED) {
2586                     /* just fall through */
2587                 } else if (action == MBCS_STATE_ILLEGAL) {
2588                     /* callback(illegal) */
2589                     cr[0] = CoderResult.malformedForLength(1);
2590                 } else {
2591                     /* reserved, must never occur */
2592                     ++sourceIndex;
2593                     continue;
2594                 }
2595 
2596                 if (cr[0].isError()) {
2597                     /* callback(illegal) */
2598                     break;
2599                 } else /* unassigned sequences indicated with byteIndex>0 */{
2600                     /* try an extension mapping */
2601                     int sourceBeginIndex = sourceArrayIndex;
2602                     toUBytesArray[0] = source.get(sourceArrayIndex - 1);
2603                     source.position(sourceArrayIndex);
2604                     toULength = toU((byte) 1, source, target, offsets, sourceIndex, flush, cr);
2605                     sourceArrayIndex = source.position();
2606                     sourceIndex += 1 + (sourceArrayIndex - sourceBeginIndex);
2607 
2608                     if (cr[0].isError()) {
2609                         /* not mappable or buffer overflow */
2610                         break;
2611                     }
2612                 }
2613             }
2614 
2615             /* write back the updated pointers */
2616             source.position(sourceArrayIndex);
2617 
2618             return cr[0];
2619         }
2620 
getFallback(UConverterMBCSTable mbcsTable, int offset)2621         private int getFallback(UConverterMBCSTable mbcsTable, int offset) {
2622             MBCSToUFallback[] toUFallbacks;
2623             int i, start, limit;
2624 
2625             limit = mbcsTable.countToUFallbacks;
2626             if (limit > 0) {
2627                 /* do a binary search for the fallback mapping */
2628                 toUFallbacks = mbcsTable.toUFallbacks;
2629                 start = 0;
2630                 while (start < limit - 1) {
2631                     i = (start + limit) >>> 1;
2632                     if (offset < toUFallbacks[i].offset) {
2633                         limit = i;
2634                     } else {
2635                         start = i;
2636                     }
2637                 }
2638 
2639                 /* did we really find it? */
2640                 if (offset == toUFallbacks[start].offset) {
2641                     return toUFallbacks[start].codePoint;
2642                 }
2643             }
2644 
2645             return 0xfffe;
2646         }
2647 
2648         /**
2649          * This is a simple version of _MBCSGetNextUChar() that is used by other converter implementations. It only
2650          * returns an "assigned" result if it consumes the entire input. It does not use state from the converter, nor
2651          * error codes. It does not handle the EBCDIC swaplfnl option (set in UConverter). It handles conversion
2652          * extensions but not GB 18030.
2653          *
2654          * @return U+fffe unassigned U+ffff illegal otherwise the Unicode code point
2655          */
simpleGetNextUChar(ByteBuffer source, boolean useFallback)2656         int simpleGetNextUChar(ByteBuffer source, boolean useFallback) {
2657 
2658             // #if 0
2659             // /*
2660             // * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
2661             // * TODO In future releases, verify that this function is never called for SBCS
2662             // * conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
2663             // * Removal improves code coverage.
2664             // */
2665             // /* use optimized function if possible */
2666             // if(sharedData->mbcs.countStates==1) {
2667             // if(length==1) {
2668             // return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
2669             // } else {
2670             // return 0xffff; /* illegal: more than a single byte for an SBCS converter */
2671             // }
2672             // }
2673             // #endif
2674 
2675             /* set up the local pointers */
2676             int[][] stateTable = sharedData.mbcs.stateTable;
2677             char[] unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
2678 
2679             /* converter state */
2680             int offset = 0;
2681             int state = sharedData.mbcs.dbcsOnlyState;
2682 
2683             int action;
2684             int entry;
2685             int c;
2686             int i = source.position();
2687             int length = source.limit() - i;
2688 
2689             /* conversion loop */
2690             while (true) {
2691                 // entry=stateTable[state][(uint8_t)source[i++]];
2692                 entry = stateTable[state][source.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK];
2693 
2694                 if (MBCS_ENTRY_IS_TRANSITION(entry)) {
2695                     state = MBCS_ENTRY_TRANSITION_STATE(entry);
2696                     offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
2697 
2698                     if (i == source.limit()) {
2699                         return 0xffff; /* truncated character */
2700                     }
2701                 } else {
2702                     /*
2703                      * An if-else-if chain provides more reliable performance for the most common cases compared to a
2704                      * switch.
2705                      */
2706                     action = MBCS_ENTRY_FINAL_ACTION(entry);
2707                     if (action == MBCS_STATE_VALID_16) {
2708                         offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
2709                         c = unicodeCodeUnits[offset];
2710                         if (c != 0xfffe) {
2711                             /* done */
2712                         } else if (isToUUseFallback()) {
2713                             c = getFallback(sharedData.mbcs, offset);
2714                         }
2715                         /* else done with 0xfffe */
2716                     } else if (action == MBCS_STATE_VALID_DIRECT_16) {
2717                         // /* output BMP code point */
2718                         c = MBCS_ENTRY_FINAL_VALUE_16(entry);
2719                     } else if (action == MBCS_STATE_VALID_16_PAIR) {
2720                         offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
2721                         c = unicodeCodeUnits[offset++];
2722                         if (c < 0xd800) {
2723                             /* output BMP code point below 0xd800 */
2724                         } else if (isToUUseFallback() ? c <= 0xdfff : c <= 0xdbff) {
2725                             /* output roundtrip or fallback supplementary code point */
2726                             c = (((c & 0x3ff) << 10) + unicodeCodeUnits[offset] + (0x10000 - 0xdc00));
2727                         } else if (isToUUseFallback() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
2728                             /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
2729                             c = unicodeCodeUnits[offset];
2730                         } else if (c == 0xffff) {
2731                             return 0xffff;
2732                         } else {
2733                             c = 0xfffe;
2734                         }
2735                     } else if (action == MBCS_STATE_VALID_DIRECT_20) {
2736                         /* output supplementary code point */
2737                         c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry);
2738                     } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
2739                         if (!isToUUseFallback(useFallback)) {
2740                             c = 0xfffe;
2741                         } else {
2742                             /* output BMP code point */
2743                             c = MBCS_ENTRY_FINAL_VALUE_16(entry);
2744                         }
2745                     } else if (action == MBCS_STATE_FALLBACK_DIRECT_20) {
2746                         if (!isToUUseFallback(useFallback)) {
2747                             c = 0xfffe;
2748                         } else {
2749                             /* output supplementary code point */
2750                             c = 0x10000 + MBCS_ENTRY_FINAL_VALUE(entry);
2751                         }
2752                     } else if (action == MBCS_STATE_UNASSIGNED) {
2753                         c = 0xfffe;
2754                     } else {
2755                         /*
2756                          * forbid MBCS_STATE_CHANGE_ONLY for this function, and MBCS_STATE_ILLEGAL and reserved action
2757                          * codes
2758                          */
2759                         return 0xffff;
2760                     }
2761                     break;
2762                 }
2763             }
2764 
2765             if (i != source.limit()) {
2766                 /* illegal for this function: not all input consumed */
2767                 return 0xffff;
2768             }
2769 
2770             if (c == 0xfffe) {
2771                 /* try an extension mapping */
2772                 if (sharedData.mbcs.extIndexes != null) {
2773                     /* Increase the limit for proper handling. Used in LMBCS. */
2774                     if (source.limit() > i + length) {
2775                         source.limit(i + length);
2776                     }
2777                     return simpleMatchToU(source, useFallback);
2778                 }
2779             }
2780 
2781             return c;
2782         }
hasValidTrailBytes(int[][] stateTable, short state)2783         private boolean hasValidTrailBytes(int[][] stateTable, short state) {
2784             int[] row = stateTable[state];
2785             int b, entry;
2786             /* First test for final entries in this state for some commonly valid byte values. */
2787             entry = row[0xa1];
2788             if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
2789                 return true;
2790             }
2791             entry = row[0x41];
2792             if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
2793                 return true;
2794             }
2795             /* Then test for final entries in this state. */
2796             for (b = 0; b <= 0xff; b++) {
2797                 entry = row[b];
2798                 if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
2799                     return true;
2800                 }
2801             }
2802             /* Then recurse for transition entries. */
2803             for (b = 0; b <= 0xff; b++) {
2804                 entry = row[b];
2805                 if (MBCS_ENTRY_IS_TRANSITION(entry) &&
2806                         hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry))) {
2807                     return true;
2808                 }
2809             }
2810             return false;
2811         }
2812 
isSingleOrLead(int[][] stateTable, int state, boolean isDBCSOnly, int b)2813         private boolean isSingleOrLead(int[][] stateTable, int state, boolean isDBCSOnly, int b) {
2814             int[] row = stateTable[state];
2815             int entry = row[b];
2816             if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
2817                 return hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry));
2818             } else {
2819                 int action = MBCS_ENTRY_FINAL_ACTION(entry);
2820                 if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
2821                     return false;   /* SI/SO are illegal for DBCS-only conversion */
2822                 } else {
2823                     return (action != MBCS_STATE_ILLEGAL);
2824                 }
2825             }
2826         }
2827 
2828 
2829     }
2830 
2831     class CharsetEncoderMBCS extends CharsetEncoderICU {
2832         private boolean allowReplacementChanges = false;
2833 
CharsetEncoderMBCS(CharsetICU cs)2834         CharsetEncoderMBCS(CharsetICU cs) {
2835             super(cs, fromUSubstitution);
2836             allowReplacementChanges = true; // allow changes in implReplaceWith
2837             implReset();
2838         }
2839 
2840         @Override
implReset()2841         protected void implReset() {
2842             super.implReset();
2843             preFromUFirstCP = UConverterConstants.U_SENTINEL;
2844         }
2845 
2846         @Override
2847         @SuppressWarnings("fallthrough")
encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush)2848         protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
2849             CoderResult[] cr = { CoderResult.UNDERFLOW };
2850             // if (!source.hasRemaining() && fromUChar32 == 0)
2851             // return cr[0];
2852 
2853             int sourceArrayIndex;
2854             char[] table;
2855             byte[] pArray, bytes;
2856             char[] chars;
2857             int[] ints;
2858             int pArrayIndex, outputType, c;
2859             int prevSourceIndex, sourceIndex, nextSourceIndex;
2860             int stage2Entry = 0, value = 0, length = 0, prevLength;
2861             short uniMask;
2862             // long asciiRoundtrips;
2863 
2864             byte[] si_value = new byte[2];
2865             byte[] so_value = new byte[2];
2866             int si_value_length = 0, so_value_length = 0;
2867 
2868             boolean gotoUnassigned = false;
2869 
2870             try {
2871 
2872                 if (!flush && preFromUFirstCP >= 0) {
2873                     /*
2874                      * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change
2875                      * with continuous offsets
2876                      */
2877                     cr[0] = continueMatchFromU(source, target, offsets, flush, -1);
2878 
2879                     if (cr[0].isError() || preFromULength < 0) {
2880                         return cr[0];
2881                     }
2882                 }
2883 
2884                 /* use optimized function if possible */
2885                 outputType = sharedData.mbcs.outputType;
2886                 uniMask = sharedData.mbcs.unicodeMask;
2887                 if (outputType == MBCS_OUTPUT_1 && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
2888                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
2889                         cr[0] = cnvMBCSSingleFromBMPWithOffsets(source, target, offsets, flush);
2890                     } else {
2891                         cr[0] = cnvMBCSSingleFromUnicodeWithOffsets(source, target, offsets, flush);
2892                     }
2893                     return cr[0];
2894                 } else if (outputType == MBCS_OUTPUT_2) {
2895                     cr[0] = cnvMBCSDoubleFromUnicodeWithOffsets(source, target, offsets, flush);
2896                     return cr[0];
2897                 }
2898 
2899                 table = sharedData.mbcs.fromUnicodeTable;
2900                 int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
2901                 sourceArrayIndex = source.position();
2902 
2903                 bytes = sharedData.mbcs.fromUnicodeBytes;
2904                 ints = sharedData.mbcs.fromUnicodeInts;
2905                 if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
2906                     chars = sharedData.mbcs.swapLFNLFromUnicodeChars;
2907                 } else {
2908                     chars = sharedData.mbcs.fromUnicodeChars;
2909                 }
2910 
2911                 // asciiRoundtrips = sharedData.mbcs.asciiRoundtrips;
2912 
2913                 /* get the converter state from UConverter */
2914                 c = fromUChar32;
2915 
2916                 if (outputType == MBCS_OUTPUT_2_SISO) {
2917                     prevLength = fromUnicodeStatus;
2918                     if (prevLength == 0) {
2919                         /* set the real value */
2920                         prevLength = 1;
2921                     }
2922                 } else {
2923                     /* prevent fromUnicodeStatus from being set to something non-0 */
2924                     prevLength = 0;
2925                 }
2926 
2927                 /* sourceIndex=-1 if the current character began in the previous buffer */
2928                 prevSourceIndex = -1;
2929                 sourceIndex = c == 0 ? 0 : -1;
2930                 nextSourceIndex = 0;
2931 
2932                 /* Get the SI/SO character for the converter */
2933                 si_value_length = getSISOBytes(SISO_Option.SI, options, si_value);
2934                 so_value_length = getSISOBytes(SISO_Option.SO, options, so_value);
2935 
2936                 /* conversion loop */
2937                 /*
2938                  * This is another piece of ugly code: A goto into the loop if the converter state contains a first
2939                  * surrogate from the previous function call. It saves me to check in each loop iteration a check of
2940                  * if(c==0) and duplicating the trail-surrogate-handling code in the else branch of that check. I could
2941                  * not find any other way to get around this other than using a function call for the conversion and
2942                  * callback, which would be even more inefficient.
2943                  *
2944                  * Markus Scherer 2000-jul-19
2945                  */
2946                 boolean doloop = true;
2947                 boolean doread = true;
2948                 if (c != 0 && target.hasRemaining()) {
2949                     if (UTF16.isLeadSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
2950                         // c is a lead surrogate, read another input
2951                         SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,
2952                                 prevSourceIndex, prevLength);
2953                         doloop = getTrail(source, target, uniMask, x, flush, cr);
2954                         doread = x.doread;
2955                         c = x.c;
2956                         sourceArrayIndex = x.sourceArrayIndex;
2957                         sourceIndex = x.sourceIndex;
2958                         nextSourceIndex = x.nextSourceIndex;
2959                         prevSourceIndex = x.prevSourceIndex;
2960                         prevLength = x.prevLength;
2961                     } else {
2962                         // c is not a lead surrogate, do not read another input
2963                         doread = false;
2964                     }
2965                 }
2966 
2967                 if (doloop) {
2968                     while (!doread || sourceArrayIndex < source.limit()) {
2969                         /*
2970                          * This following test is to see if available input would overflow the output. It does not catch
2971                          * output of more than one byte that overflows as a result of a multi-byte character or callback
2972                          * output from the last source character. Therefore, those situations also test for overflows
2973                          * and will then break the loop, too.
2974                          */
2975                         if (target.hasRemaining()) {
2976                             /*
2977                              * Get a correct Unicode code point: a single UChar for a BMP code point or a matched
2978                              * surrogate pair for a "supplementary code point".
2979                              */
2980 
2981                             if (doread) {
2982                                 // doread might be false only on the first looping
2983 
2984                                 c = source.get(sourceArrayIndex++);
2985                                 ++nextSourceIndex;
2986 
2987                                 /*
2988                                  * This also tests if the codepage maps single surrogates. If it does, then surrogates
2989                                  * are not paired but mapped separately. Note that in this case unmatched surrogates are
2990                                  * not detected.
2991                                  */
2992                                 if (UTF16.isSurrogate((char) c)
2993                                         && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
2994                                     if (UTF16.isLeadSurrogate((char) c)) {
2995                                         // getTrail:
2996                                         SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex,
2997                                                 nextSourceIndex, prevSourceIndex, prevLength);
2998                                         doloop = getTrail(source, target, uniMask, x, flush, cr);
2999                                         c = x.c;
3000                                         sourceArrayIndex = x.sourceArrayIndex;
3001                                         sourceIndex = x.sourceIndex;
3002                                         nextSourceIndex = x.nextSourceIndex;
3003                                         prevSourceIndex = x.prevSourceIndex;
3004 
3005                                         if (x.doread) {
3006                                             if (doloop)
3007                                                 continue;
3008                                             else
3009                                                 break;
3010                                         }
3011                                     } else {
3012                                         /* this is an unmatched trail code unit (2nd surrogate) */
3013                                         /* callback(illegal) */
3014                                         cr[0] = CoderResult.malformedForLength(1);
3015                                         break;
3016                                     }
3017                                 }
3018                             } else {
3019                                 doread = true;
3020                             }
3021                             /* convert the Unicode code point in c into codepage bytes */
3022 
3023                             /*
3024                              * The basic lookup is a triple-stage compact array (trie) lookup. For details see the
3025                              * beginning of this file.
3026                              *
3027                              * Single-byte codepages are handled with a different data structure by _MBCSSingle...
3028                              * functions.
3029                              *
3030                              * The result consists of a 32-bit value from stage 2 and a pointer to as many bytes as are
3031                              * stored per character. The pointer points to the character's bytes in stage 3. Bits 15..0
3032                              * of the stage 2 entry contain the stage 3 index for that pointer, while bits 31..16 are
3033                              * flags for which of the 16 characters in the block are roundtrip-assigned.
3034                              *
3035                              * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t respectively as
3036                              * uint32_t, in the platform encoding. For 3-byte codepages, the bytes are always stored in
3037                              * big-endian order.
3038                              *
3039                              * For EUC encodings that use only either 0x8e or 0x8f as the first byte of their longest
3040                              * byte sequences, the first two bytes in this third stage indicate with their 7th bits
3041                              * whether these bytes are to be written directly or actually need to be preceeded by one of
3042                              * the two Single-Shift codes. With this, the third stage stores one byte fewer per
3043                              * character than the actual maximum length of EUC byte sequences.
3044                              *
3045                              * Other than that, leading zero bytes are removed and the other bytes output. A single zero
3046                              * byte may be output if the "assigned" bit in stage 2 was on. The data structure does not
3047                              * support zero byte output as a fallback, and also does not allow output of leading zeros.
3048                              */
3049                             stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
3050 
3051                             /* get the bytes and the length for the output */
3052                             switch (outputType) {
3053                             /* This is handled above with the method cnvMBCSDoubleFromUnicodeWithOffsets() */
3054                             /* case MBCS_OUTPUT_2:
3055                                 value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
3056                                 if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
3057                                     length = 1;
3058                                 } else {
3059                                     length = 2;
3060                                 }
3061                                 break; */
3062                             case MBCS_OUTPUT_2_SISO:
3063                                 /* 1/2-byte stateful with Shift-In/Shift-Out */
3064                                 /*
3065                                  * Save the old state in the converter object right here, then change the local
3066                                  * prevLength state variable if necessary. Then, if this character turns out to be
3067                                  * unassigned or a fallback that is not taken, the callback code must not save the new
3068                                  * state in the converter because the new state is for a character that is not output.
3069                                  * However, the callback must still restore the state from the converter in case the
3070                                  * callback function changed it for its output.
3071                                  */
3072                                 fromUnicodeStatus = prevLength; /* save the old state */
3073                                 value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
3074                                 if (value <= 0xff) {
3075                                     if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) {
3076                                         /* no mapping, leave value==0 */
3077                                         length = 0;
3078                                     } else if (prevLength <= 1) {
3079                                         length = 1;
3080                                     } else {
3081                                         /* change from double-byte mode to single-byte */
3082                                         if (si_value_length == 1) {
3083                                             value|=si_value[0]<<8;
3084                                             length = 2;
3085                                         } else if (si_value_length == 2) {
3086                                             value|=si_value[1]<<8;
3087                                             value|=si_value[0]<<16;
3088                                             length = 3;
3089                                         }
3090                                         prevLength = 1;
3091                                     }
3092                                 } else {
3093                                     if (prevLength == 2) {
3094                                         length = 2;
3095                                     } else {
3096                                         /* change from single-byte mode to double-byte */
3097                                         if (so_value_length == 1) {
3098                                             value|=so_value[0]<<16;
3099                                             length = 3;
3100                                         } else if (so_value_length == 2) {
3101                                             value|=so_value[1]<<16;
3102                                             value|=so_value[0]<<24;
3103                                             length = 4;
3104                                         }
3105                                         prevLength = 2;
3106                                     }
3107                                 }
3108                                 break;
3109                             case MBCS_OUTPUT_DBCS_ONLY:
3110                                 /* table with single-byte results, but only DBCS mappings used */
3111                                 value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
3112                                 if (value <= 0xff) {
3113                                     /* no mapping or SBCS result, not taken for DBCS-only */
3114                                     value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
3115                                     length = 0;
3116                                 } else {
3117                                     length = 2;
3118                                 }
3119                                 break;
3120                             case MBCS_OUTPUT_3:
3121                                 pArray = bytes;
3122                                 pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
3123                                 value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
3124                                         | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
3125                                         | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
3126                                 if (value <= 0xff) {
3127                                     length = 1;
3128                                 } else if (value <= 0xffff) {
3129                                     length = 2;
3130                                 } else {
3131                                     length = 3;
3132                                 }
3133                                 break;
3134                             case MBCS_OUTPUT_4:
3135                                 value = MBCS_VALUE_4_FROM_STAGE_2(ints, stage2Entry, c);
3136                                 if (value < 0) {
3137                                     // Half of the 4-byte values look negative in a signed int.
3138                                     length = 4;
3139                                 } else if (value <= 0xff) {
3140                                     length = 1;
3141                                 } else if (value <= 0xffff) {
3142                                     length = 2;
3143                                 } else if (value <= 0xffffff) {
3144                                     length = 3;
3145                                 } else {
3146                                     length = 4;
3147                                 }
3148                                 break;
3149                             case MBCS_OUTPUT_3_EUC:
3150                                 value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
3151                                 /* EUC 16-bit fixed-length representation */
3152                                 if (value <= 0xff) {
3153                                     length = 1;
3154                                 } else if ((value & 0x8000) == 0) {
3155                                     value |= 0x8e8000;
3156                                     length = 3;
3157                                 } else if ((value & 0x80) == 0) {
3158                                     value |= 0x8f0080;
3159                                     length = 3;
3160                                 } else {
3161                                     length = 2;
3162                                 }
3163                                 break;
3164                             case MBCS_OUTPUT_4_EUC:
3165                                 pArray = bytes;
3166                                 pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
3167                                 value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
3168                                         | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
3169                                         | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
3170                                 /* EUC 16-bit fixed-length representation applied to the first two bytes */
3171                                 if (value <= 0xff) {
3172                                     length = 1;
3173                                 } else if (value <= 0xffff) {
3174                                     length = 2;
3175                                 } else if ((value & 0x800000) == 0) {
3176                                     value |= 0x8e800000;
3177                                     length = 4;
3178                                 } else if ((value & 0x8000) == 0) {
3179                                     value |= 0x8f008000;
3180                                     length = 4;
3181                                 } else {
3182                                     length = 3;
3183                                 }
3184                                 break;
3185                             default:
3186                                 /* must not occur */
3187                                 /*
3188                                  * To avoid compiler warnings that value & length may be used without having been
3189                                  * initialized, we set them here. In reality, this is unreachable code. Not having a
3190                                  * default branch also causes warnings with some compilers.
3191                                  */
3192                                 value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
3193                                 length = 0;
3194                                 break;
3195                             }
3196 
3197                             /* is this code point assigned, or do we use fallbacks? */
3198                             if (gotoUnassigned || (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0)))) {
3199                                 gotoUnassigned = false;
3200                                 /*
3201                                  * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way
3202                                  * with this data structure for fallback output to be a zero byte.
3203                                  */
3204 
3205                                 // unassigned:
3206                                 SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex,
3207                                         prevSourceIndex, prevLength);
3208                                 doloop = unassigned(source, target, offsets, x, flush, cr);
3209                                 c = x.c;
3210                                 sourceArrayIndex = x.sourceArrayIndex;
3211                                 sourceIndex = x.sourceIndex;
3212                                 nextSourceIndex = x.nextSourceIndex;
3213                                 prevSourceIndex = x.prevSourceIndex;
3214                                 prevLength = x.prevLength;
3215                                 if (doloop)
3216                                     continue;
3217                                 else
3218                                     break;
3219                             }
3220 
3221                             /* write the output character bytes from value and length */
3222                             /* from the first if in the loop we know that targetCapacity>0 */
3223                             if (length <= target.remaining()) {
3224                                 switch (length) {
3225                                 /* each branch falls through to the next one */
3226                                 case 4:
3227                                     target.put((byte) (value >>> 24));
3228                                     if (offsets != null) {
3229                                         offsets.put(sourceIndex);
3230                                     }
3231                                 case 3:
3232                                     target.put((byte) (value >>> 16));
3233                                     if (offsets != null) {
3234                                         offsets.put(sourceIndex);
3235                                     }
3236                                 case 2:
3237                                     target.put((byte) (value >>> 8));
3238                                     if (offsets != null) {
3239                                         offsets.put(sourceIndex);
3240                                     }
3241                                 case 1:
3242                                     target.put((byte) value);
3243                                     if (offsets != null) {
3244                                         offsets.put(sourceIndex);
3245                                     }
3246                                 default:
3247                                     /* will never occur */
3248                                     break;
3249                                 }
3250                             } else {
3251                                 int errorBufferArrayIndex;
3252 
3253                                 /*
3254                                  * We actually do this backwards here: In order to save an intermediate variable, we
3255                                  * output first to the overflow buffer what does not fit into the regular target.
3256                                  */
3257                                 /* we know that 1<=targetCapacity<length<=4 */
3258                                 length -= target.remaining();
3259 
3260                                 errorBufferArrayIndex = 0;
3261                                 switch (length) {
3262                                 /* each branch falls through to the next one */
3263                                 case 3:
3264                                     errorBuffer[errorBufferArrayIndex++] = (byte) (value >>> 16);
3265                                 case 2:
3266                                     errorBuffer[errorBufferArrayIndex++] = (byte) (value >>> 8);
3267                                 case 1:
3268                                     errorBuffer[errorBufferArrayIndex] = (byte) value;
3269                                 default:
3270                                     /* will never occur */
3271                                     break;
3272                                 }
3273                                 errorBufferLength = (byte) length;
3274 
3275                                 /* now output what fits into the regular target */
3276                                 value >>>= 8 * length; /* length was reduced by targetCapacity */
3277                                 switch (target.remaining()) {
3278                                 /* each branch falls through to the next one */
3279                                 case 3:
3280                                     target.put((byte) (value >>> 16));
3281                                     if (offsets != null) {
3282                                         offsets.put(sourceIndex);
3283                                     }
3284                                 case 2:
3285                                     target.put((byte) (value >>> 8));
3286                                     if (offsets != null) {
3287                                         offsets.put(sourceIndex);
3288                                     }
3289                                 case 1:
3290                                     target.put((byte) value);
3291                                     if (offsets != null) {
3292                                         offsets.put(sourceIndex);
3293                                     }
3294                                 default:
3295                                     /* will never occur */
3296                                     break;
3297                                 }
3298 
3299                                 /* target overflow */
3300                                 cr[0] = CoderResult.OVERFLOW;
3301                                 c = 0;
3302                                 break;
3303                             }
3304 
3305                             /* normal end of conversion: prepare for a new character */
3306                             c = 0;
3307                             if (offsets != null) {
3308                                 prevSourceIndex = sourceIndex;
3309                                 sourceIndex = nextSourceIndex;
3310                             }
3311                             continue;
3312                         } else {
3313                             /* target is full */
3314                             cr[0] = CoderResult.OVERFLOW;
3315                             break;
3316                         }
3317                     }
3318                 }
3319 
3320                 /*
3321                  * the end of the input stream and detection of truncated input are handled by the framework, but for
3322                  * EBCDIC_STATEFUL conversion we need to emit an SI at the very end
3323                  *
3324                  * conditions: successful EBCDIC_STATEFUL in DBCS mode end of input and no truncated input
3325                  */
3326                 if (outputType == MBCS_OUTPUT_2_SISO && prevLength == 2 && flush && sourceArrayIndex >= source.limit()
3327                         && c == 0) {
3328 
3329                     /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
3330                     if (target.hasRemaining()) {
3331                         target.put(si_value[0]);
3332                         if (si_value_length == 2) {
3333                             if (target.remaining() > 0) {
3334                                 target.put(si_value[1]);
3335                             } else {
3336                                 errorBuffer[0] = si_value[1];
3337                                 errorBufferLength = 1;
3338                                 cr[0] = CoderResult.OVERFLOW;
3339                             }
3340                         }
3341                         if (offsets != null) {
3342                             /* set the last source character's index (sourceIndex points at sourceLimit now) */
3343                             offsets.put(prevSourceIndex);
3344                         }
3345                     } else {
3346                         /* target is full */
3347                         errorBuffer[0] = si_value[0];
3348                         if (si_value_length == 2) {
3349                             errorBuffer[1] = si_value[1];
3350                         }
3351                         errorBufferLength = si_value_length;
3352                         cr[0] = CoderResult.OVERFLOW;
3353                     }
3354                     prevLength = 1; /* we switched into SBCS */
3355                 }
3356 
3357                 /* set the converter state back into UConverter */
3358                 fromUChar32 = c;
3359                 fromUnicodeStatus = prevLength;
3360 
3361                 source.position(sourceArrayIndex);
3362             } catch (BufferOverflowException ex) {
3363                 cr[0] = CoderResult.OVERFLOW;
3364             }
3365 
3366             return cr[0];
3367         }
3368 
3369         /*
3370          * This is another simple conversion function for internal use by other conversion implementations. It does not
3371          * use the converter state nor call callbacks. It does not handle the EBCDIC swaplfnl option (set in
3372          * UConverter). It handles conversion extensions but not GB 18030.
3373          *
3374          * It converts one single Unicode code point into codepage bytes, encoded as one 32-bit value. The function
3375          * returns the number of bytes in *pValue: 1..4 the number of bytes in *pValue 0 unassigned (*pValue undefined)
3376          * -1 illegal (currently not used, *pValue undefined)
3377          *
3378          * *pValue will contain the resulting bytes with the last byte in bits 7..0, the second to last byte in bits
3379          * 15..8, etc. Currently, the function assumes but does not check that 0<=c<=0x10ffff.
3380          */
fromUChar32(int c, int[] pValue, boolean isUseFallback)3381         int fromUChar32(int c, int[] pValue, boolean isUseFallback) {
3382             // #if 0
3383             // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
3384             // const uint8_t *p;
3385             // #endif
3386 
3387             char[] table;
3388             int stage2Entry;
3389             int value;
3390             int length;
3391             int p;
3392 
3393             /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
3394             if (c <= 0xffff || sharedData.mbcs.hasSupplementary()) {
3395                 table = sharedData.mbcs.fromUnicodeTable;
3396 
3397                 /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
3398                 if (sharedData.mbcs.outputType == MBCS_OUTPUT_1) {
3399                     value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c);
3400                     /* is this code point assigned, or do we use fallbacks? */
3401                     if (isUseFallback ? value >= 0x800 : value >= 0xc00) {
3402                         pValue[0] = value & 0xff;
3403                         return 1;
3404                     }
3405                 } else /* outputType!=MBCS_OUTPUT_1 */{
3406                     int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
3407                     stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
3408 
3409                     /* get the bytes and the length for the output */
3410                     switch (sharedData.mbcs.outputType) {
3411                     case MBCS_OUTPUT_2:
3412                         value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeChars, stage2Entry, c);
3413                         if (value <= 0xff) {
3414                             length = 1;
3415                         } else {
3416                             length = 2;
3417                         }
3418                         break;
3419                     // #if 0
3420                     // /* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
3421                     // case MBCS_OUTPUT_DBCS_ONLY:
3422                     // /* table with single-byte results, but only DBCS mappings used */
3423                     // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
3424                     // if(value<=0xff) {
3425                     // /* no mapping or SBCS result, not taken for DBCS-only */
3426                     // value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
3427                     // length=0;
3428                     // } else {
3429                     // length=2;
3430                     // }
3431                     // break;
3432                     case MBCS_OUTPUT_3:
3433                         byte[] bytes = sharedData.mbcs.fromUnicodeBytes;
3434                         p = CharsetMBCS.MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
3435                         value = ((bytes[p] & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) |
3436                             ((bytes[p+1] & UConverterConstants.UNSIGNED_BYTE_MASK)<<8) |
3437                             (bytes[p+2] & UConverterConstants.UNSIGNED_BYTE_MASK);
3438                         if (value <= 0xff) {
3439                             length = 1;
3440                         } else if (value <= 0xffff) {
3441                             length = 2;
3442                         } else {
3443                             length = 3;
3444                         }
3445                         break;
3446                     // case MBCS_OUTPUT_4:
3447                     // value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
3448                     // if(value<=0xff) {
3449                     // length=1;
3450                     // } else if(value<=0xffff) {
3451                     // length=2;
3452                     // } else if(value<=0xffffff) {
3453                     // length=3;
3454                     // } else {
3455                     // length=4;
3456                     // }
3457                     // break;
3458                     // case MBCS_OUTPUT_3_EUC:
3459                     // value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
3460                     // /* EUC 16-bit fixed-length representation */
3461                     // if(value<=0xff) {
3462                     // length=1;
3463                     // } else if((value&0x8000)==0) {
3464                     // value|=0x8e8000;
3465                     // length=3;
3466                     // } else if((value&0x80)==0) {
3467                     // value|=0x8f0080;
3468                     // length=3;
3469                     // } else {
3470                     // length=2;
3471                     // }
3472                     // break;
3473                     // case MBCS_OUTPUT_4_EUC:
3474                     // p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
3475                     // value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
3476                     // /* EUC 16-bit fixed-length representation applied to the first two bytes */
3477                     // if(value<=0xff) {
3478                     // length=1;
3479                     // } else if(value<=0xffff) {
3480                     // length=2;
3481                     // } else if((value&0x800000)==0) {
3482                     // value|=0x8e800000;
3483                     // length=4;
3484                     // } else if((value&0x8000)==0) {
3485                     // value|=0x8f008000;
3486                     // length=4;
3487                     // } else {
3488                     // length=3;
3489                     // }
3490                     // break;
3491                     // #endif
3492                     default:
3493                         /* must not occur */
3494                         return -1;
3495                     }
3496 
3497                     /* is this code point assigned, or do we use fallbacks? */
3498                     if (MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
3499                             || (CharsetEncoderICU.isFromUUseFallback(isUseFallback, c) && value != 0)) {
3500                         /*
3501                          * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way with
3502                          * this data structure for fallback output to be a zero byte.
3503                          */
3504                         /* assigned */
3505                         pValue[0] = value;
3506                         return length;
3507                     }
3508                 }
3509             }
3510 
3511             if (sharedData.mbcs.extIndexes != null) {
3512                 length = simpleMatchFromU(c, pValue, isUseFallback);
3513                 return length >= 0 ? length : -length; /* return abs(length); */
3514             }
3515 
3516             /* unassigned */
3517             return 0;
3518         }
3519 
3520         /*
3521          * continue partial match with new input, requires cnv->preFromUFirstCP>=0 never called for simple,
3522          * single-character conversion
3523          */
continueMatchFromU(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush, int srcIndex)3524         private CoderResult continueMatchFromU(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush,
3525                 int srcIndex) {
3526             CoderResult cr = CoderResult.UNDERFLOW;
3527             int[] value = new int[1];
3528             int match;
3529 
3530             match = matchFromU(preFromUFirstCP, preFromUArray, preFromUBegin, preFromULength, source, value, useFallback, flush);
3531             if (match >= 2) {
3532                 match -= 2; /* remove 2 for the initial code point */
3533 
3534                 if (match >= preFromULength) {
3535                     /* advance src pointer for the consumed input */
3536                     source.position(source.position() + match - preFromULength);
3537                     preFromULength = 0;
3538                 } else {
3539                     /* the match did not use all of preFromU[] - keep the rest for replay */
3540                     int length = preFromULength - match;
3541                     System.arraycopy(preFromUArray, preFromUBegin + match, preFromUArray, preFromUBegin, length);
3542                     preFromULength = (byte) -length;
3543                 }
3544 
3545                 /* finish the partial match */
3546                 preFromUFirstCP = UConverterConstants.U_SENTINEL;
3547 
3548                 /* write result */
3549                 writeFromU(value[0], target, offsets, srcIndex);
3550             } else if (match < 0) {
3551                 /* save state for partial match */
3552                 int sArrayIndex;
3553                 int j;
3554 
3555                 /* just _append_ the newly consumed input to preFromU[] */
3556                 sArrayIndex = source.position();
3557                 match = -match - 2; /* remove 2 for the initial code point */
3558                 for (j = preFromULength; j < match; ++j) {
3559                     preFromUArray[j] = source.get(sArrayIndex++);
3560                 }
3561                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
3562                 preFromULength = (byte) match;
3563             } else { /* match==0 or 1 */
3564                 /*
3565                  * no match
3566                  *
3567                  * We need to split the previous input into two parts:
3568                  *
3569                  * 1. The first code point is unmappable - that's how we got into trying the extension data in the first
3570                  * place. We need to move it from the preFromU buffer to the error buffer, set an error code, and
3571                  * prepare the rest of the previous input for 2.
3572                  *
3573                  * 2. The rest of the previous input must be converted once we come back from the callback for the first
3574                  * code point. At that time, we have to try again from scratch to convert these input characters. The
3575                  * replay will be handled by the ucnv.c conversion code.
3576                  */
3577 
3578                 if (match == 1) {
3579                     /* matched, no mapping but request for <subchar1> */
3580                     useSubChar1 = true;
3581                 }
3582 
3583                 /* move the first code point to the error field */
3584                 fromUChar32 = preFromUFirstCP;
3585                 preFromUFirstCP = UConverterConstants.U_SENTINEL;
3586 
3587                 /* mark preFromU for replay */
3588                 preFromULength = (byte) -preFromULength;
3589 
3590                 /* set the error code for unassigned */
3591                 // TODO: figure out what the unmappable length really should be
3592                 cr = CoderResult.unmappableForLength(1);
3593             }
3594             return cr;
3595         }
3596 
3597         /**
3598          * @param cx
3599          *            pointer to extension data; if NULL, returns 0
3600          * @param firstCP
3601          *            the first code point before all the other UChars
3602          * @param pre
3603          *            UChars that must match; !initialMatch: partial match with them
3604          * @param preLength
3605          *            length of pre, >=0
3606          * @param src
3607          *            UChars that can be used to complete a match
3608          * @param srcLength
3609          *            length of src, >=0
3610          * @param pMatchValue
3611          *            [out] output result value for the match from the data structure
3612          * @param useFallback
3613          *            "use fallback" flag, usually from cnv->useFallback
3614          * @param flush
3615          *            TRUE if the end of the input stream is reached
3616          * @return >1: matched, return value=total match length (number of input units matched) 1: matched, no mapping
3617          *         but request for <subchar1> (only for the first code point) 0: no match <0: partial match, return
3618          *         value=negative total match length (partial matches are never returned for flush==TRUE) (partial
3619          *         matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) the matchLength is 2 if only
3620          *         firstCP matched, and >2 if firstCP and further code units matched
3621          */
3622         // static int32_t ucnv_extMatchFromU(const int32_t *cx, UChar32 firstCP, const UChar *pre, int32_t preLength,
3623         // const UChar *src, int32_t srcLength, uint32_t *pMatchValue, UBool useFallback, UBool flush)
matchFromU(int firstCP, char[] preArray, int preArrayBegin, int preLength, CharBuffer source, int[] pMatchValue, boolean isUseFallback, boolean flush)3624         private int matchFromU(int firstCP, char[] preArray, int preArrayBegin, int preLength, CharBuffer source,
3625                 int[] pMatchValue, boolean isUseFallback, boolean flush) {
3626             ByteBuffer cx = sharedData.mbcs.extIndexes;
3627 
3628             CharBuffer stage12, stage3;
3629             IntBuffer stage3b;
3630 
3631             CharBuffer fromUTableUChars, fromUSectionUChars;
3632             IntBuffer fromUTableValues, fromUSectionValues;
3633 
3634             int value, matchValue;
3635             int i, j, index, length, matchLength;
3636             char c;
3637 
3638             if (cx == null) {
3639                 return 0; /* no extension data, no match */
3640             }
3641 
3642             /* trie lookup of firstCP */
3643             index = firstCP >>> 10; /* stage 1 index */
3644             if (index >= cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH)) {
3645                 return 0; /* the first code point is outside the trie */
3646             }
3647 
3648             stage12 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX, char.class);
3649             stage3 = (CharBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX, char.class);
3650             index = FROM_U(stage12, stage3, index, firstCP);
3651 
3652             stage3b = (IntBuffer) ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX, int.class);
3653             value = stage3b.get(stage3b.position() + index);
3654             if (value == 0) {
3655                 return 0;
3656             }
3657 
3658             if (TO_U_IS_PARTIAL(value)) {
3659                 /* partial match, enter the loop below */
3660                 index = FROM_U_GET_PARTIAL_INDEX(value);
3661 
3662                 /* initialize */
3663                 fromUTableUChars = (CharBuffer) ARRAY(cx, EXT_FROM_U_UCHARS_INDEX, char.class);
3664                 fromUTableValues = (IntBuffer) ARRAY(cx, EXT_FROM_U_VALUES_INDEX, int.class);
3665 
3666                 matchValue = 0;
3667                 i = j = matchLength = 0;
3668 
3669                 /* we must not remember fallback matches when not using fallbacks */
3670 
3671                 /* match input units until there is a full match or the input is consumed */
3672                 for (;;) {
3673                     /* go to the next section */
3674                     int oldpos = fromUTableUChars.position();
3675                     fromUSectionUChars = ((CharBuffer) fromUTableUChars.position(index)).slice();
3676                     fromUTableUChars.position(oldpos);
3677                     oldpos = fromUTableValues.position();
3678                     fromUSectionValues = ((IntBuffer) fromUTableValues.position(index)).slice();
3679                     fromUTableValues.position(oldpos);
3680 
3681                     /* read first pair of the section */
3682                     length = fromUSectionUChars.get();
3683                     value = fromUSectionValues.get();
3684                     if (value != 0 && (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP))) {
3685                         /* remember longest match so far */
3686                         matchValue = value;
3687                         matchLength = 2 + i + j;
3688                     }
3689 
3690                     /* match pre[] then src[] */
3691                     if (i < preLength) {
3692                         c = preArray[preArrayBegin + i++];
3693                     } else if (source != null && j < source.remaining()) {
3694                         c = source.get(source.position() + j++);
3695                     } else {
3696                         /* all input consumed, partial match */
3697                         if (flush || (length = (i + j)) > MAX_UCHARS) {
3698                             /*
3699                              * end of the entire input stream, stop with the longest match so far or: partial match must
3700                              * not be longer than UCNV_EXT_MAX_UCHARS because it must fit into state buffers
3701                              */
3702                             break;
3703                         } else {
3704                             /* continue with more input next time */
3705                             return -(2 + length);
3706                         }
3707                     }
3708 
3709                     /* search for the current UChar */
3710                     index = findFromU(fromUSectionUChars, length, c);
3711                     if (index < 0) {
3712                         /* no match here, stop with the longest match so far */
3713                         break;
3714                     } else {
3715                         value = fromUSectionValues.get(fromUSectionValues.position() + index);
3716                         if (FROM_U_IS_PARTIAL(value)) {
3717                             /* partial match, continue */
3718                             index = FROM_U_GET_PARTIAL_INDEX(value);
3719                         } else {
3720                             if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) {
3721                                 /* full match, stop with result */
3722                                 matchValue = value;
3723                                 matchLength = 2 + i + j;
3724                             } else {
3725                                 /* full match on fallback not taken, stop with the longest match so far */
3726                             }
3727                             break;
3728                         }
3729                     }
3730                 }
3731 
3732                 if (matchLength == 0) {
3733                     /* no match at all */
3734                     return 0;
3735                 }
3736             } else /* result from firstCP trie lookup */{
3737                 if (FROM_U_IS_ROUNDTRIP(value) || isFromUUseFallback(isUseFallback, firstCP)) {
3738                     /* full match, stop with result */
3739                     matchValue = value;
3740                     matchLength = 2;
3741                 } else {
3742                     /* fallback not taken */
3743                     return 0;
3744                 }
3745             }
3746 
3747             if ((matchValue & FROM_U_RESERVED_MASK) != 0) {
3748                 /* do not interpret values with reserved bits used, for forward compatibility */
3749                 return 0;
3750             }
3751 
3752             /* return result */
3753             if (matchValue == FROM_U_SUBCHAR1) {
3754                 return 1; /* assert matchLength==2 */
3755             }
3756 
3757             pMatchValue[0] = FROM_U_MASK_ROUNDTRIP(matchValue);
3758             return matchLength;
3759         }
3760 
simpleMatchFromU(int cp, int[] pValue, boolean isUseFallback)3761         private int simpleMatchFromU(int cp, int[] pValue, boolean isUseFallback) {
3762             int[] value = new int[1];
3763             int match; // signed
3764 
3765             /* try to match */
3766             match = matchFromU(cp, null, 0, 0, null, value, isUseFallback, true);
3767             if (match >= 2) {
3768                 /* write result for simple, single-character conversion */
3769                 int length;
3770                 boolean isRoundtrip;
3771 
3772                 isRoundtrip = FROM_U_IS_ROUNDTRIP(value[0]);
3773                 length = FROM_U_GET_LENGTH(value[0]);
3774                 value[0] = FROM_U_GET_DATA(value[0]);
3775 
3776                 if (length <= EXT_FROM_U_MAX_DIRECT_LENGTH) {
3777                     pValue[0] = value[0];
3778                     return isRoundtrip ? length : -length;
3779                     // #if 0 /* not currently used */
3780                     // } else if(length==4) {
3781                     // /* de-serialize a 4-byte result */
3782                     // const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
3783                     // *pValue=
3784                     // ((uint32_t)result[0]<<24)|
3785                     // ((uint32_t)result[1]<<16)|
3786                     // ((uint32_t)result[2]<<8)|
3787                     // result[3];
3788                     // return isRoundtrip ? 4 : -4;
3789                     // #endif
3790                 }
3791             }
3792 
3793             /*
3794              * return no match because - match>1 && resultLength>4: result too long for simple conversion - match==1: no
3795              * match found, <subchar1> preferred - match==0: no match found in the first place - match<0: partial
3796              * match, not supported for simple conversion (and flush==TRUE)
3797              */
3798             return 0;
3799         }
3800 
3801         @SuppressWarnings("fallthrough")
writeFromU(int value, ByteBuffer target, IntBuffer offsets, int srcIndex)3802         private CoderResult writeFromU(int value, ByteBuffer target, IntBuffer offsets, int srcIndex) {
3803             ByteBuffer cx = sharedData.mbcs.extIndexes;
3804 
3805             byte bufferArray[] = new byte[1 + MAX_BYTES];
3806             int bufferArrayIndex = 0;
3807             byte[] resultArray;
3808             int resultArrayIndex;
3809             int length, prevLength;
3810 
3811             length = FROM_U_GET_LENGTH(value);
3812             value = FROM_U_GET_DATA(value);
3813 
3814             /* output the result */
3815             if (length <= FROM_U_MAX_DIRECT_LENGTH) {
3816                 /*
3817                  * Generate a byte array and then write it below. This is not the fastest possible way, but it should be
3818                  * ok for extension mappings, and it is much simpler. Offset and overflow handling are only done once
3819                  * this way.
3820                  */
3821                 int p = bufferArrayIndex + 1; /* reserve buffer[0] for shiftByte below */
3822                 switch (length) {
3823                 case 3:
3824                     bufferArray[p++] = (byte) (value >>> 16);
3825                 case 2:
3826                     bufferArray[p++] = (byte) (value >>> 8);
3827                 case 1:
3828                     bufferArray[p++] = (byte) value;
3829                 default:
3830                     break; /* will never occur */
3831                 }
3832                 resultArray = bufferArray;
3833                 resultArrayIndex = bufferArrayIndex + 1;
3834             } else {
3835                 byte[] slice = new byte[length];
3836 
3837                 ByteBuffer bb = ((ByteBuffer) ARRAY(cx, EXT_FROM_U_BYTES_INDEX, byte.class));
3838                 bb.position(value);
3839                 bb.get(slice, 0, slice.length);
3840 
3841                 resultArray = slice;
3842                 resultArrayIndex = 0;
3843             }
3844 
3845             /* with correct data we have length>0 */
3846 
3847             if ((prevLength = fromUnicodeStatus) != 0) {
3848                 /* handle SI/SO stateful output */
3849                 byte shiftByte;
3850 
3851                 if (prevLength > 1 && length == 1) {
3852                     /* change from double-byte mode to single-byte */
3853                     shiftByte = (byte) UConverterConstants.SI;
3854                     fromUnicodeStatus = 1;
3855                 } else if (prevLength == 1 && length > 1) {
3856                     /* change from single-byte mode to double-byte */
3857                     shiftByte = (byte) UConverterConstants.SO;
3858                     fromUnicodeStatus = 2;
3859                 } else {
3860                     shiftByte = 0;
3861                 }
3862 
3863                 if (shiftByte != 0) {
3864                     /* prepend the shift byte to the result bytes */
3865                     bufferArray[0] = shiftByte;
3866                     if (resultArray != bufferArray || resultArrayIndex != bufferArrayIndex + 1) {
3867                         System.arraycopy(resultArray, resultArrayIndex, bufferArray, bufferArrayIndex + 1, length);
3868                     }
3869                     resultArray = bufferArray;
3870                     resultArrayIndex = bufferArrayIndex;
3871                     ++length;
3872                 }
3873             }
3874 
3875             return fromUWriteBytes(this, resultArray, resultArrayIndex, length, target, offsets, srcIndex);
3876         }
3877 
3878         /*
3879          * @return if(U_FAILURE) return the code point for cnv->fromUChar32 else return 0 after output has been written
3880          * to the target
3881          */
fromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex, int length, boolean flush, CoderResult[] cr)3882         private int fromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex,
3883                 int length, boolean flush, CoderResult[] cr) {
3884             // ByteBuffer cx;
3885 
3886             useSubChar1 = false;
3887 
3888             if (sharedData.mbcs.extIndexes != null
3889                     && initialMatchFromU(cp, source, target, offsets, sourceIndex, flush, cr)) {
3890                 return 0; /* an extension mapping handled the input */
3891             }
3892 
3893             /* GB 18030 */
3894             if ((options & MBCS_OPTION_GB18030) != 0) {
3895                 int[] range;
3896                 int i;
3897 
3898                 for (i = 0; i < gb18030Ranges.length; ++i) {
3899                     range = gb18030Ranges[i];
3900                     if (range[0] <= cp && cp <= range[1]) {
3901                         /* found the Unicode code point, output the four-byte sequence for it */
3902                         int linear;
3903                         byte bytes[] = new byte[4];
3904 
3905                         /* get the linear value of the first GB 18030 code in this range */
3906                         linear = range[2] - LINEAR_18030_BASE;
3907 
3908                         /* add the offset from the beginning of the range */
3909                         linear += (cp - range[0]);
3910 
3911                         bytes[3] = (byte) (0x30 + linear % 10);
3912                         linear /= 10;
3913                         bytes[2] = (byte) (0x81 + linear % 126);
3914                         linear /= 126;
3915                         bytes[1] = (byte) (0x30 + linear % 10);
3916                         linear /= 10;
3917                         bytes[0] = (byte) (0x81 + linear);
3918 
3919                         /* output this sequence */
3920                         cr[0] = fromUWriteBytes(this, bytes, 0, 4, target, offsets, sourceIndex);
3921                         return 0;
3922                     }
3923                 }
3924             }
3925 
3926             /* no mapping */
3927             cr[0] = CoderResult.unmappableForLength(length);
3928             return cp;
3929         }
3930 
3931         /*
3932          * target<targetLimit; set error code for overflow
3933          */
initialMatchFromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets, int srcIndex, boolean flush, CoderResult[] cr)3934         private boolean initialMatchFromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets,
3935                 int srcIndex, boolean flush, CoderResult[] cr) {
3936             int[] value = new int[1];
3937             int match;
3938 
3939             /* try to match */
3940             match = matchFromU(cp, null, 0, 0, source, value, useFallback, flush);
3941 
3942             /* reject a match if the result is a single byte for DBCS-only */
3943             if (match >= 2
3944                     && !(FROM_U_GET_LENGTH(value[0]) == 1 && sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY)) {
3945                 /* advance src pointer for the consumed input */
3946                 source.position(source.position() + match - 2); /* remove 2 for the initial code point */
3947 
3948                 /* write result to target */
3949                 cr[0] = writeFromU(value[0], target, offsets, srcIndex);
3950                 return true;
3951             } else if (match < 0) {
3952                 /* save state for partial match */
3953                 int sArrayIndex;
3954                 int j;
3955 
3956                 /* copy the first code point */
3957                 preFromUFirstCP = cp;
3958 
3959                 /* now copy the newly consumed input */
3960                 sArrayIndex = source.position();
3961                 match = -match - 2; /* remove 2 for the initial code point */
3962                 for (j = 0; j < match; ++j) {
3963                     preFromUArray[j] = source.get(sArrayIndex++);
3964                 }
3965                 source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
3966                 preFromULength = (byte) match;
3967                 return true;
3968             } else if (match == 1) {
3969                 /* matched, no mapping but request for <subchar1> */
3970                 useSubChar1 = true;
3971                 return false;
3972             } else /* match==0 no match */{
3973                 return false;
3974             }
3975         }
3976 
cnvMBCSFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush)3977         CoderResult cnvMBCSFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
3978             // Just call encodeLoop to remove duplicate code.
3979             return encodeLoop(source, target, offsets, flush);
3980         }
3981 
3982         /*
3983          * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages that map only to and from the
3984          * BMP. In addition to single-byte/state optimizations, the offset calculations become much easier.
3985          */
cnvMBCSSingleFromBMPWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush)3986         private CoderResult cnvMBCSSingleFromBMPWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets,
3987                 boolean flush) {
3988 
3989             CoderResult[] cr = { CoderResult.UNDERFLOW };
3990 
3991             int sourceArrayIndex, lastSource;
3992             int targetCapacity, length;
3993             char[] table;
3994             char[] results;
3995 
3996             int c, sourceIndex;
3997             char value, minValue;
3998 
3999             /* set up the local pointers */
4000             sourceArrayIndex = source.position();
4001             targetCapacity = target.remaining();
4002             table = sharedData.mbcs.fromUnicodeTable;
4003 
4004             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
4005                 results = sharedData.mbcs.swapLFNLFromUnicodeChars;
4006             } else {
4007                 results = sharedData.mbcs.fromUnicodeChars;
4008             }
4009 
4010             if (useFallback) {
4011                 /* use all roundtrip and fallback results */
4012                 minValue = 0x800;
4013             } else {
4014                 /* use only roundtrips and fallbacks from private-use characters */
4015                 minValue = 0xc00;
4016             }
4017 
4018             /* get the converter state from UConverter */
4019             c = fromUChar32;
4020 
4021             /* sourceIndex=-1 if the current character began in the previous buffer */
4022             sourceIndex = c == 0 ? 0 : -1;
4023             lastSource = sourceArrayIndex;
4024 
4025             /*
4026              * since the conversion here is 1:1 UChar:uint8_t, we need only one counter for the minimum of the
4027              * sourceLength and targetCapacity
4028              */
4029             length = source.limit() - sourceArrayIndex;
4030             if (length < targetCapacity) {
4031                 targetCapacity = length;
4032             }
4033 
4034             boolean doloop = true;
4035             if (c != 0 && targetCapacity > 0) {
4036                 SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
4037                 doloop = getTrailSingleBMP(source, x, cr);
4038                 c = x.c;
4039                 sourceArrayIndex = x.sourceArrayIndex;
4040             }
4041 
4042             if (doloop) {
4043                 while (targetCapacity > 0) {
4044                     /*
4045                      * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate pair
4046                      * for a "supplementary code point".
4047                      */
4048                     c = source.get(sourceArrayIndex++);
4049                     /*
4050                      * Do not immediately check for single surrogates: Assume that they are unassigned and check for
4051                      * them in that case. This speeds up the conversion of assigned characters.
4052                      */
4053                     /* convert the Unicode code point in c into codepage bytes */
4054                     value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);
4055 
4056                     /* is this code point assigned, or do we use fallbacks? */
4057                     if (value >= minValue) {
4058                         /* assigned, write the output character bytes from value and length */
4059                         /* length==1 */
4060                         /* this is easy because we know that there is enough space */
4061                         target.put((byte) value);
4062                         --targetCapacity;
4063 
4064                         /* normal end of conversion: prepare for a new character */
4065                         c = 0;
4066                         continue;
4067                     } else if (!UTF16.isSurrogate((char) c)) {
4068                         /* normal, unassigned BMP character */
4069                     } else if (UTF16.isLeadSurrogate((char) c)) {
4070                         // getTrail:
4071                         SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
4072                         doloop = getTrailSingleBMP(source, x, cr);
4073                         c = x.c;
4074                         sourceArrayIndex = x.sourceArrayIndex;
4075                         if (!doloop)
4076                             break;
4077                     } else {
4078                         /* this is an unmatched trail code unit (2nd surrogate) */
4079                         /* callback(illegal) */
4080                         cr[0] = CoderResult.malformedForLength(1);
4081                         break;
4082                     }
4083 
4084                     /* c does not have a mapping */
4085 
4086                     /* get the number of code units for c to correctly advance sourceIndex */
4087                     length = UTF16.getCharCount(c);
4088 
4089                     /* set offsets since the start or the last extension */
4090                     if (offsets != null) {
4091                         int count = sourceArrayIndex - lastSource;
4092 
4093                         /* do not set the offset for this character */
4094                         count -= length;
4095 
4096                         while (count > 0) {
4097                             offsets.put(sourceIndex++);
4098                             --count;
4099                         }
4100                         /* offsets and sourceIndex are now set for the current character */
4101                     }
4102 
4103                     /* try an extension mapping */
4104                     lastSource = sourceArrayIndex;
4105                     source.position(sourceArrayIndex);
4106                     c = fromU(c, source, target, offsets, sourceIndex, length, flush, cr);
4107                     sourceArrayIndex = source.position();
4108                     sourceIndex += length + (sourceArrayIndex - lastSource);
4109                     lastSource = sourceArrayIndex;
4110 
4111                     if (cr[0].isError()) {
4112                         /* not mappable or buffer overflow */
4113                         break;
4114                     } else {
4115                         /* a mapping was written to the target, continue */
4116 
4117                         /* recalculate the targetCapacity after an extension mapping */
4118                         targetCapacity = target.remaining();
4119                         length = source.limit() - sourceArrayIndex;
4120                         if (length < targetCapacity) {
4121                             targetCapacity = length;
4122                         }
4123                     }
4124                 }
4125             }
4126 
4127             if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
4128                 /* target is full */
4129                 cr[0] = CoderResult.OVERFLOW;
4130             }
4131 
4132             /* set offsets since the start or the last callback */
4133             if (offsets != null) {
4134                 int count = sourceArrayIndex - lastSource;
4135                 while (count > 0) {
4136                     offsets.put(sourceIndex++);
4137                     --count;
4138                 }
4139             }
4140 
4141             /* set the converter state back into UConverter */
4142             fromUChar32 = c;
4143 
4144             /* write back the updated pointers */
4145             source.position(sourceArrayIndex);
4146 
4147             return cr[0];
4148         }
4149 
4150         /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
cnvMBCSSingleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush)4151         private CoderResult cnvMBCSSingleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target,
4152                 IntBuffer offsets, boolean flush) {
4153 
4154             CoderResult[] cr = { CoderResult.UNDERFLOW };
4155 
4156             int sourceArrayIndex;
4157 
4158             char[] table;
4159             char[] results;
4160 
4161             int c;
4162             int sourceIndex, nextSourceIndex;
4163 
4164             char value, minValue;
4165 
4166             /* set up the local pointers */
4167             short uniMask;
4168             sourceArrayIndex = source.position();
4169 
4170             table = sharedData.mbcs.fromUnicodeTable;
4171 
4172             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
4173                 results = sharedData.mbcs.swapLFNLFromUnicodeChars;
4174             } else {
4175                 results = sharedData.mbcs.fromUnicodeChars;
4176             }
4177 
4178             if (useFallback) {
4179                 /* use all roundtrip and fallback results */
4180                 minValue = 0x800;
4181             } else {
4182                 /* use only roundtrips and fallbacks from private-use characters */
4183                 minValue = 0xc00;
4184             }
4185             // agljport:comment hasSupplementary only used in getTrail block which now simply repeats the mask operation
4186             uniMask = sharedData.mbcs.unicodeMask;
4187 
4188             /* get the converter state from UConverter */
4189             c = fromUChar32;
4190 
4191             /* sourceIndex=-1 if the current character began in the previous buffer */
4192             sourceIndex = c == 0 ? 0 : -1;
4193             nextSourceIndex = 0;
4194 
4195             boolean doloop = true;
4196             boolean doread = true;
4197             if (c != 0 && target.hasRemaining()) {
4198                 if (UTF16.isLeadSurrogate((char) c)) {
4199                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
4200                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
4201                     doread = x.doread;
4202                     c = x.c;
4203                     sourceArrayIndex = x.sourceArrayIndex;
4204                     sourceIndex = x.sourceIndex;
4205                     nextSourceIndex = x.nextSourceIndex;
4206                 } else {
4207                     doread = false;
4208                 }
4209             }
4210 
4211             if (doloop) {
4212                 while (!doread || sourceArrayIndex < source.limit()) {
4213                     /*
4214                      * This following test is to see if available input would overflow the output. It does not catch
4215                      * output of more than one byte that overflows as a result of a multi-byte character or callback
4216                      * output from the last source character. Therefore, those situations also test for overflows and
4217                      * will then break the loop, too.
4218                      */
4219                     if (target.hasRemaining()) {
4220                         /*
4221                          * Get a correct Unicode code point: a single UChar for a BMP code point or a matched surrogate
4222                          * pair for a "supplementary code point".
4223                          */
4224 
4225                         if (doread) {
4226                             c = source.get(sourceArrayIndex++);
4227                             ++nextSourceIndex;
4228                             if (UTF16.isSurrogate((char) c)) {
4229                                 if (UTF16.isLeadSurrogate((char) c)) {
4230                                     // getTrail:
4231                                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
4232                                             nextSourceIndex);
4233                                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
4234                                     c = x.c;
4235                                     sourceArrayIndex = x.sourceArrayIndex;
4236                                     sourceIndex = x.sourceIndex;
4237                                     nextSourceIndex = x.nextSourceIndex;
4238                                     if (x.doread) {
4239                                         if (doloop)
4240                                             continue;
4241                                         else
4242                                             break;
4243                                     }
4244                                 } else {
4245                                     /* this is an unmatched trail code unit (2nd surrogate) */
4246                                     /* callback(illegal) */
4247                                     cr[0] = CoderResult.malformedForLength(1);
4248                                     break;
4249                                 }
4250                             }
4251                         } else {
4252                             doread = true;
4253                         }
4254 
4255                         /* convert the Unicode code point in c into codepage bytes */
4256                         value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);
4257 
4258                         /* is this code point assigned, or do we use fallbacks? */
4259                         if (value >= minValue) {
4260                             /* assigned, write the output character bytes from value and length */
4261                             /* length==1 */
4262                             /* this is easy because we know that there is enough space */
4263                             target.put((byte) value);
4264                             if (offsets != null) {
4265                                 offsets.put(sourceIndex);
4266                             }
4267 
4268                             /* normal end of conversion: prepare for a new character */
4269                             c = 0;
4270                             sourceIndex = nextSourceIndex;
4271                         } else { /* unassigned */
4272                             /* try an extension mapping */
4273                             SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
4274                                     nextSourceIndex);
4275                             doloop = unassignedDouble(source, target, x, flush, cr);
4276                             c = x.c;
4277                             sourceArrayIndex = x.sourceArrayIndex;
4278                             sourceIndex = x.sourceIndex;
4279                             nextSourceIndex = x.nextSourceIndex;
4280                             if (!doloop)
4281                                 break;
4282                         }
4283                     } else {
4284                         /* target is full */
4285                         cr[0] = CoderResult.OVERFLOW;
4286                         break;
4287                     }
4288                 }
4289             }
4290 
4291             /* set the converter state back into UConverter */
4292             fromUChar32 = c;
4293 
4294             /* write back the updated pointers */
4295             source.position(sourceArrayIndex);
4296 
4297             return cr[0];
4298         }
4299 
4300         /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
cnvMBCSDoubleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush)4301         private CoderResult cnvMBCSDoubleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target,
4302                 IntBuffer offsets, boolean flush) {
4303             CoderResult[] cr = { CoderResult.UNDERFLOW };
4304 
4305             int sourceArrayIndex;
4306 
4307             char[] table;
4308             char[] chars;
4309 
4310             int c, sourceIndex, nextSourceIndex;
4311 
4312             int stage2Entry;
4313             int value;
4314             int length;
4315             short uniMask;
4316 
4317             /* use optimized function if possible */
4318             uniMask = sharedData.mbcs.unicodeMask;
4319 
4320             /* set up the local pointers */
4321             sourceArrayIndex = source.position();
4322 
4323             table = sharedData.mbcs.fromUnicodeTable;
4324             int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
4325 
4326             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
4327                 chars = sharedData.mbcs.swapLFNLFromUnicodeChars;
4328             } else {
4329                 chars = sharedData.mbcs.fromUnicodeChars;
4330             }
4331 
4332             /* get the converter state from UConverter */
4333             c = fromUChar32;
4334 
4335             /* sourceIndex=-1 if the current character began in the previous buffer */
4336             sourceIndex = c == 0 ? 0 : -1;
4337             nextSourceIndex = 0;
4338 
4339             /* conversion loop */
4340             boolean doloop = true;
4341             boolean doread = true;
4342             if (c != 0 && target.hasRemaining()) {
4343                 if (UTF16.isLeadSurrogate((char) c)) {
4344                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
4345                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
4346                     doread = x.doread;
4347                     c = x.c;
4348                     sourceArrayIndex = x.sourceArrayIndex;
4349                     sourceIndex = x.sourceIndex;
4350                     nextSourceIndex = x.nextSourceIndex;
4351                 } else {
4352                     doread = false;
4353                 }
4354             }
4355 
4356             if (doloop) {
4357                 while (!doread || sourceArrayIndex < source.limit()) {
4358                     /*
4359                      * This following test is to see if available input would overflow the output. It does not catch
4360                      * output of more than one byte that overflows as a result of a multi-byte character or callback
4361                      * output from the last source character. Therefore, those situations also test for overflows and
4362                      * will then break the loop, too.
4363                      */
4364                     if (target.hasRemaining()) {
4365                         if (doread) {
4366                             /*
4367                              * Get a correct Unicode code point: a single UChar for a BMP code point or a matched
4368                              * surrogate pair for a "supplementary code point".
4369                              */
4370                             c = source.get(sourceArrayIndex++);
4371                             ++nextSourceIndex;
4372                             /*
4373                              * This also tests if the codepage maps single surrogates. If it does, then surrogates are
4374                              * not paired but mapped separately. Note that in this case unmatched surrogates are not
4375                              * detected.
4376                              */
4377                             if (UTF16.isSurrogate((char) c) && (uniMask & UConverterConstants.HAS_SURROGATES) == 0) {
4378                                 if (UTF16.isLeadSurrogate((char) c)) {
4379                                     // getTrail:
4380                                     SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
4381                                             nextSourceIndex);
4382                                     doloop = getTrailDouble(source, target, uniMask, x, flush, cr);
4383                                     c = x.c;
4384                                     sourceArrayIndex = x.sourceArrayIndex;
4385                                     sourceIndex = x.sourceIndex;
4386                                     nextSourceIndex = x.nextSourceIndex;
4387 
4388                                     if (x.doread) {
4389                                         if (doloop)
4390                                             continue;
4391                                         else
4392                                             break;
4393                                     }
4394                                 } else {
4395                                     /* this is an unmatched trail code unit (2nd surrogate) */
4396                                     /* callback(illegal) */
4397                                     cr[0] = CoderResult.malformedForLength(1);
4398                                     break;
4399                                 }
4400                             }
4401                         } else {
4402                             doread = true;
4403                         }
4404 
4405                         /* convert the Unicode code point in c into codepage bytes */
4406                         stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
4407 
4408                         /* get the bytes and the length for the output */
4409                         /* MBCS_OUTPUT_2 */
4410                         value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
4411                         if (value <= 0xff) {
4412                             length = 1;
4413                         } else {
4414                             length = 2;
4415                         }
4416 
4417                         /* is this code point assigned, or do we use fallbacks? */
4418                         if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value != 0))) {
4419                             /*
4420                              * We allow a 0 byte output if the "assigned" bit is set for this entry. There is no way
4421                              * with this data structure for fallback output to be a zero byte.
4422                              */
4423 
4424                             // unassigned:
4425                             SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex,
4426                                     nextSourceIndex);
4427 
4428                             doloop = unassignedDouble(source, target, x, flush, cr);
4429                             c = x.c;
4430                             sourceArrayIndex = x.sourceArrayIndex;
4431                             sourceIndex = x.sourceIndex;
4432                             nextSourceIndex = x.nextSourceIndex;
4433                             if (doloop)
4434                                 continue;
4435                             else
4436                                 break;
4437                         }
4438 
4439                         /* write the output character bytes from value and length */
4440                         /* from the first if in the loop we know that targetCapacity>0 */
4441                         if (length == 1) {
4442                             /* this is easy because we know that there is enough space */
4443                             target.put((byte) value);
4444                             if (offsets != null) {
4445                                 offsets.put(sourceIndex);
4446                             }
4447                         } else /* length==2 */{
4448                             target.put((byte) (value >>> 8));
4449                             if (2 <= target.remaining()) {
4450                                 target.put((byte) value);
4451                                 if (offsets != null) {
4452                                     offsets.put(sourceIndex);
4453                                     offsets.put(sourceIndex);
4454                                 }
4455                             } else {
4456                                 if (offsets != null) {
4457                                     offsets.put(sourceIndex);
4458                                 }
4459                                 errorBuffer[0] = (byte) value;
4460                                 errorBufferLength = 1;
4461 
4462                                 /* target overflow */
4463                                 cr[0] = CoderResult.OVERFLOW;
4464                                 c = 0;
4465                                 break;
4466                             }
4467                         }
4468 
4469                         /* normal end of conversion: prepare for a new character */
4470                         c = 0;
4471                         sourceIndex = nextSourceIndex;
4472                         continue;
4473                     } else {
4474                         /* target is full */
4475                         cr[0] = CoderResult.OVERFLOW;
4476                         break;
4477                     }
4478                 }
4479             }
4480 
4481             /* set the converter state back into UConverter */
4482             fromUChar32 = c;
4483 
4484             /* write back the updated pointers */
4485             source.position(sourceArrayIndex);
4486 
4487             return cr[0];
4488         }
4489 
4490         private final class SideEffectsSingleBMP {
4491             int c, sourceArrayIndex;
4492 
SideEffectsSingleBMP(int c_, int sourceArrayIndex_)4493             SideEffectsSingleBMP(int c_, int sourceArrayIndex_) {
4494                 c = c_;
4495                 sourceArrayIndex = sourceArrayIndex_;
4496             }
4497         }
4498 
4499         // function made out of block labeled getTrail in ucnv_MBCSSingleFromUnicodeWithOffsets
4500         // assumes input c is lead surrogate
getTrailSingleBMP(CharBuffer source, SideEffectsSingleBMP x, CoderResult[] cr)4501         private final boolean getTrailSingleBMP(CharBuffer source, SideEffectsSingleBMP x, CoderResult[] cr) {
4502             if (x.sourceArrayIndex < source.limit()) {
4503                 /* test the following code unit */
4504                 char trail = source.get(x.sourceArrayIndex);
4505                 if (UTF16.isTrailSurrogate(trail)) {
4506                     ++x.sourceArrayIndex;
4507                     x.c = UCharacter.getCodePoint((char) x.c, trail);
4508                     /* this codepage does not map supplementary code points */
4509                     /* callback(unassigned) */
4510                     cr[0] = CoderResult.unmappableForLength(2);
4511                     return false;
4512                 } else {
4513                     /* this is an unmatched lead code unit (1st surrogate) */
4514                     /* callback(illegal) */
4515                     cr[0] = CoderResult.malformedForLength(1);
4516                     return false;
4517                 }
4518             } else {
4519                 /* no more input */
4520                 return false;
4521             }
4522             // return true;
4523         }
4524 
4525         private final class SideEffects {
4526             int c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength;
4527             boolean doread = true;
4528 
SideEffects(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_, int prevSourceIndex_, int prevLength_)4529             SideEffects(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_, int prevSourceIndex_,
4530                     int prevLength_) {
4531                 c = c_;
4532                 sourceArrayIndex = sourceArrayIndex_;
4533                 sourceIndex = sourceIndex_;
4534                 nextSourceIndex = nextSourceIndex_;
4535                 prevSourceIndex = prevSourceIndex_;
4536                 prevLength = prevLength_;
4537             }
4538         }
4539 
4540         // function made out of block labeled getTrail in ucnv_MBCSFromUnicodeWithOffsets
4541         // assumes input c is lead surrogate
getTrail(CharBuffer source, ByteBuffer target, int uniMask, SideEffects x, boolean flush, CoderResult[] cr)4542         private final boolean getTrail(CharBuffer source, ByteBuffer target, int uniMask, SideEffects x,
4543                 boolean flush, CoderResult[] cr) {
4544             if (x.sourceArrayIndex < source.limit()) {
4545                 /* test the following code unit */
4546                 char trail = source.get(x.sourceArrayIndex);
4547                 if (UTF16.isTrailSurrogate(trail)) {
4548                     ++x.sourceArrayIndex;
4549                     ++x.nextSourceIndex;
4550                     /* convert this supplementary code point */
4551                     x.c = UCharacter.getCodePoint((char) x.c, trail);
4552                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
4553                         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
4554                         fromUnicodeStatus = x.prevLength; /* save the old state */
4555                         /* callback(unassigned) */
4556                         x.doread = true;
4557                         return unassigned(source, target, null, x, flush, cr);
4558                     } else {
4559                         x.doread = false;
4560                         return true;
4561                     }
4562                 } else {
4563                     /* this is an unmatched lead code unit (1st surrogate) */
4564                     /* callback(illegal) */
4565                     cr[0] = CoderResult.malformedForLength(1);
4566                     return false;
4567                 }
4568             } else {
4569                 /* no more input */
4570                 return false;
4571             }
4572         }
4573 
4574         // function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets
unassigned(CharBuffer source, ByteBuffer target, IntBuffer offsets, SideEffects x, boolean flush, CoderResult[] cr)4575         private final boolean unassigned(CharBuffer source, ByteBuffer target, IntBuffer offsets, SideEffects x,
4576                 boolean flush, CoderResult[] cr) {
4577             /* try an extension mapping */
4578             int sourceBegin = x.sourceArrayIndex;
4579             source.position(x.sourceArrayIndex);
4580             x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr);
4581             x.sourceArrayIndex = source.position();
4582             x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;
4583             x.prevLength = fromUnicodeStatus;
4584 
4585             if (cr[0].isError()) {
4586                 /* not mappable or buffer overflow */
4587                 return false;
4588             } else {
4589                 /* a mapping was written to the target, continue */
4590 
4591                 /* recalculate the targetCapacity after an extension mapping */
4592                 // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;
4593                 /* normal end of conversion: prepare for a new character */
4594                 if (offsets != null) {
4595                     x.prevSourceIndex = x.sourceIndex;
4596                     x.sourceIndex = x.nextSourceIndex;
4597                 }
4598                 return true;
4599             }
4600         }
4601 
4602         private final class SideEffectsDouble {
4603             int c, sourceArrayIndex, sourceIndex, nextSourceIndex;
4604             boolean doread = true;
4605 
SideEffectsDouble(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_)4606             SideEffectsDouble(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_) {
4607                 c = c_;
4608                 sourceArrayIndex = sourceArrayIndex_;
4609                 sourceIndex = sourceIndex_;
4610                 nextSourceIndex = nextSourceIndex_;
4611             }
4612         }
4613 
4614         // function made out of block labeled getTrail in ucnv_MBCSDoubleFromUnicodeWithOffsets
4615         // assumes input c is lead surrogate
getTrailDouble(CharBuffer source, ByteBuffer target, int uniMask, SideEffectsDouble x, boolean flush, CoderResult[] cr)4616         private final boolean getTrailDouble(CharBuffer source, ByteBuffer target, int uniMask,
4617                 SideEffectsDouble x, boolean flush, CoderResult[] cr) {
4618             if (x.sourceArrayIndex < source.limit()) {
4619                 /* test the following code unit */
4620                 char trail = source.get(x.sourceArrayIndex);
4621                 if (UTF16.isTrailSurrogate(trail)) {
4622                     ++x.sourceArrayIndex;
4623                     ++x.nextSourceIndex;
4624                     /* convert this supplementary code point */
4625                     x.c = UCharacter.getCodePoint((char) x.c, trail);
4626                     if ((uniMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
4627                         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
4628                         /* callback(unassigned) */
4629                         x.doread = true;
4630                         return unassignedDouble(source, target, x, flush, cr);
4631                     } else {
4632                         x.doread = false;
4633                         return true;
4634                     }
4635                 } else {
4636                     /* this is an unmatched lead code unit (1st surrogate) */
4637                     /* callback(illegal) */
4638                     cr[0] = CoderResult.malformedForLength(1);
4639                     return false;
4640                 }
4641             } else {
4642                 /* no more input */
4643                 return false;
4644             }
4645         }
4646 
4647         // function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets
unassignedDouble(CharBuffer source, ByteBuffer target, SideEffectsDouble x, boolean flush, CoderResult[] cr)4648         private final boolean unassignedDouble(CharBuffer source, ByteBuffer target, SideEffectsDouble x,
4649                 boolean flush, CoderResult[] cr) {
4650             /* try an extension mapping */
4651             int sourceBegin = x.sourceArrayIndex;
4652             source.position(x.sourceArrayIndex);
4653             x.c = fromU(x.c, source, target, null, x.sourceIndex, x.nextSourceIndex, flush, cr);
4654             x.sourceArrayIndex = source.position();
4655             x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;
4656 
4657             if (cr[0].isError()) {
4658                 /* not mappable or buffer overflow */
4659                 return false;
4660             } else {
4661                 /* a mapping was written to the target, continue */
4662 
4663                 /* recalculate the targetCapacity after an extension mapping */
4664                 // x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;
4665                 /* normal end of conversion: prepare for a new character */
4666                 x.sourceIndex = x.nextSourceIndex;
4667                 return true;
4668             }
4669         }
4670 
4671         /**
4672          * Overrides super class method
4673          *
4674          * @param encoder
4675          * @param source
4676          * @param target
4677          * @param offsets
4678          * @return
4679          */
4680         @Override
cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target, IntBuffer offsets)4681         protected CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, ByteBuffer target,
4682                 IntBuffer offsets) {
4683             CharsetMBCS cs = (CharsetMBCS) encoder.charset();
4684             byte[] subchar;
4685             int length;
4686 
4687             if (cs.subChar1 != 0
4688                     && (cs.sharedData.mbcs.extIndexes != null ? encoder.useSubChar1
4689                             : (encoder.invalidUCharBuffer[0] <= 0xff))) {
4690                 /*
4691                  * select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS
4692                  * behavior)
4693                  */
4694                 subchar = new byte[] { cs.subChar1 };
4695                 length = 1;
4696             } else {
4697                 /* select subChar in all other cases */
4698                 subchar = cs.subChar;
4699                 length = cs.subCharLen;
4700             }
4701 
4702             /* reset the selector for the next code point */
4703             encoder.useSubChar1 = false;
4704 
4705             if (cs.sharedData.mbcs.outputType == MBCS_OUTPUT_2_SISO) {
4706                 byte[] buffer = new byte[4];
4707                 int i = 0;
4708 
4709                 /* fromUnicodeStatus contains prevLength */
4710                 switch (length) {
4711                 case 1:
4712                     if (encoder.fromUnicodeStatus == 2) {
4713                         /* DBCS mode and SBCS sub char: change to SBCS */
4714                         encoder.fromUnicodeStatus = 1;
4715                         buffer[i++] = UConverterConstants.SI;
4716                     }
4717                     buffer[i++] = subchar[0];
4718                     break;
4719                 case 2:
4720                     if (encoder.fromUnicodeStatus <= 1) {
4721                         /* SBCS mode and DBCS sub char: change to DBCS */
4722                         encoder.fromUnicodeStatus = 2;
4723                         buffer[i++] = UConverterConstants.SO;
4724                     }
4725                     buffer[i++] = subchar[0];
4726                     buffer[i++] = subchar[1];
4727                     break;
4728                 default:
4729                     throw new IllegalArgumentException();
4730                 }
4731 
4732                 subchar = buffer;
4733                 length = i;
4734             }
4735             return CharsetEncoderICU.fromUWriteBytes(encoder, subchar, 0, length, target, offsets, source.position());
4736         }
4737 
4738         /**
4739          * Gets called whenever CharsetEncoder.replaceWith gets called. allowReplacementChanges only allows subChar and
4740          * subChar1 to be modified outside construction (since replaceWith is called once during construction).
4741          *
4742          * @param replacement
4743          *            The replacement for subchar.
4744          */
4745         @Override
implReplaceWith(byte[] replacement)4746         protected void implReplaceWith(byte[] replacement) {
4747             if (allowReplacementChanges) {
4748                 CharsetMBCS cs = (CharsetMBCS) this.charset();
4749 
4750                 System.arraycopy(replacement, 0, cs.subChar, 0, replacement.length);
4751                 cs.subCharLen = (byte) replacement.length;
4752                 cs.subChar1 = 0;
4753             }
4754         }
4755     }
4756 
4757     @Override
newDecoder()4758     public CharsetDecoder newDecoder() {
4759         return new CharsetDecoderMBCS(this);
4760     }
4761 
4762     @Override
newEncoder()4763     public CharsetEncoder newEncoder() {
4764         return new CharsetEncoderMBCS(this);
4765     }
4766 
4767     @SuppressWarnings("fallthrough")
MBCSGetFilteredUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which, int filter)4768     void MBCSGetFilteredUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which, int filter){
4769         UConverterMBCSTable mbcsTable;
4770         char[] table;
4771         char st1,maxStage1, st2;
4772         int st3;
4773         int c ;
4774 
4775         mbcsTable = data.mbcs;
4776         table = mbcsTable.fromUnicodeTable;
4777         if(mbcsTable.hasSupplementary()){
4778             maxStage1 = 0x440;
4779         }
4780         else{
4781             maxStage1 = 0x40;
4782         }
4783         c=0; /* keep track of current code point while enumerating */
4784 
4785         if(mbcsTable.outputType==MBCS_OUTPUT_1){
4786             char stage2, stage3;
4787             char minValue;
4788             char[] results = mbcsTable.fromUnicodeChars;
4789 
4790             if(which==ROUNDTRIP_SET) {
4791                 /* use only roundtrips */
4792                 minValue=0xf00;
4793             } else {
4794                 /* use all roundtrip and fallback results */
4795                 minValue=0x800;
4796             }
4797             for(st1=0;st1<maxStage1;++st1){
4798                 st2 = table[st1];
4799                 if(st2>maxStage1){
4800                     stage2 = st2;
4801                     for(st2=0; st2<64; ++st2){
4802                         st3 = table[stage2 + st2];
4803                         if(st3!=0){
4804                             /*read the stage 3 block */
4805                             stage3 = (char)st3;
4806                             do {
4807                                 if(results[stage3++]>=minValue){
4808                                      setFillIn.add(c);
4809                                 }
4810                             }while((++c&0xf) !=0);
4811                           } else {
4812                             c+= 16; /*empty stage 2 block */
4813                         }
4814                     }
4815                 } else {
4816                     c+=1024; /* empty stage 2 block */
4817                 }
4818             }
4819         } else {
4820             int[] tableInts = mbcsTable.fromUnicodeTableInts;
4821             int stage2,stage3;
4822             byte[] bytes;
4823             int st3Multiplier;
4824             int value;
4825             boolean useFallBack;
4826             bytes = mbcsTable.fromUnicodeBytes;
4827             char[] chars = mbcsTable.fromUnicodeChars;
4828             int[] ints = mbcsTable.fromUnicodeInts;
4829             useFallBack = (which == ROUNDTRIP_AND_FALLBACK_SET);
4830             switch(mbcsTable.outputType) {
4831             case MBCS_OUTPUT_3:
4832             case MBCS_OUTPUT_4_EUC:
4833                 st3Multiplier = 3;
4834                 break;
4835             case MBCS_OUTPUT_4:
4836                 st3Multiplier =4;
4837                 break;
4838             default:
4839                 st3Multiplier =2;
4840                 break;
4841             }
4842 
4843             for(st1=0;st1<maxStage1;++st1){
4844                 st2 = table[st1];
4845                 if(st2>(maxStage1>>1)){
4846                     stage2 =  st2 ;
4847                     for(st2=0;st2<64;++st2){
4848                         /*read the stage 3 block */
4849                         st3 = tableInts[stage2 + st2];
4850                         if(st3!=0){
4851                         //if((st3=table[stage2+st2])!=0){
4852                             stage3 = st3Multiplier*16*(st3&UConverterConstants.UNSIGNED_SHORT_MASK);
4853 
4854                             /* get the roundtrip flags for the stage 3 block */
4855                             st3>>>=16;
4856                             switch(filter) {
4857                             case UCNV_SET_FILTER_NONE:
4858                                 do {
4859                                    if((st3&1)!=0){
4860                                         setFillIn.add(c);
4861                                    }else if (useFallBack) {
4862                                         int b =0;
4863                                         switch(st3Multiplier) {
4864                                         case 4:
4865                                             b = ints[stage3 / 4];
4866                                             break;
4867                                         case 3:
4868                                             b |= bytes[stage3] | bytes[stage3 + 1] | bytes[stage3 + 2];
4869                                             break;
4870                                         case 2:
4871                                             b = chars[stage3 / 2];
4872                                             break;
4873                                         default:
4874                                             break;
4875                                         }
4876                                         stage3+=st3Multiplier;
4877                                         if(b!=0) {
4878                                             setFillIn.add(c);
4879                                         }
4880                                     }
4881                                     st3>>=1;
4882                                 }while((++c&0xf)!=0);
4883                                 break;
4884                             case UCNV_SET_FILTER_DBCS_ONLY:
4885                                 /* Ignore single bytes results (<0x100). */
4886                                 do {
4887                                     if(((st3&1) != 0 || useFallBack) && chars[stage3 / 2] >= 0x100){
4888                                         setFillIn.add(c);
4889                                     }
4890                                     st3>>=1;
4891                                     stage3+=2;
4892                                 }while((++c&0xf) != 0);
4893                                break;
4894                             case UCNV_SET_FILTER_2022_CN :
4895                                 /* only add code points that map to CNS 11643 planes 1&2 for non-EXT ISO-2202-CN. */
4896                                 do {
4897                                     if(((st3&1) != 0 || useFallBack) &&
4898                                             ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & bytes[stage3]))==0x81 || value==0x82) ){
4899                                         setFillIn.add(c);
4900                                     }
4901                                     st3>>=1;
4902                                     stage3+=3;
4903                                 }while((++c&0xf)!=0);
4904                                 break;
4905                             case UCNV_SET_FILTER_SJIS:
4906                                 /* only add code points that map tp Shift-JIS codes corrosponding to JIS X 0280. */
4907                                 do{
4908                                     if(((st3&1) != 0 || useFallBack) && (value=chars[stage3 / 2])>=0x8140 && value<=0xeffc){
4909                                         setFillIn.add(c);
4910                                     }
4911                                     st3>>=1;
4912                                     stage3+=2;
4913                                 }while((++c&0xf)!=0);
4914                                 break;
4915                             case UCNV_SET_FILTER_GR94DBCS:
4916                                 /* only add code points that maps to ISO 2022 GR 94 DBCS codes*/
4917                                 do {
4918                                     if(((st3&1) != 0 || useFallBack) &&
4919                                             (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])- 0xa1a1))<=(0xfefe - 0xa1a1) &&
4920                                             (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
4921                                         setFillIn.add(c);
4922                                     }
4923                                     st3>>=1;
4924                                     stage3+=2;
4925                                 }while((++c&0xf)!=0);
4926                                 break;
4927                             case UCNV_SET_FILTER_HZ:
4928                                 /*Only add code points that are suitable for HZ DBCS*/
4929                                 do {
4930                                     if( ((st3&1) != 0 || useFallBack) &&
4931                                             (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])-0xa1a1))<=(0xfdfe - 0xa1a1) &&
4932                                             (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
4933                                         setFillIn.add(c);
4934                                     }
4935                                     st3>>=1;
4936                                     stage3+=2;
4937                                 }while((++c&0xf) != 0);
4938                                 break;
4939                             default:
4940                                 return;
4941                             }
4942                         } else {
4943                             c+=16; /* empty stage 3 block */
4944                         }
4945                     }
4946                 } else {
4947                     c+=1024; /*empty stage2 block */
4948                 }
4949             }
4950         }
4951         extGetUnicodeSet(setFillIn, which, filter, data);
4952     }
4953 
extGetUnicodeSetString(ByteBuffer cx,UnicodeSet setFillIn, boolean useFallback, int minLength, int c, char s[],int length,int sectionIndex)4954     static void extGetUnicodeSetString(ByteBuffer cx,UnicodeSet setFillIn, boolean useFallback,
4955         int minLength, int c, char s[],int length,int sectionIndex){
4956         CharBuffer fromUSectionUChar;
4957         IntBuffer fromUSectionValues;
4958         fromUSectionUChar = (CharBuffer)ARRAY(cx, EXT_FROM_U_UCHARS_INDEX,char.class );
4959         fromUSectionValues = (IntBuffer)ARRAY(cx, EXT_FROM_U_VALUES_INDEX,int.class );
4960         int fromUSectionUCharIndex = fromUSectionUChar.position()+sectionIndex;
4961         int fromUSectionValuesIndex = fromUSectionValues.position()+sectionIndex;
4962         int value, i, count;
4963 
4964         /* read first pair of the section */
4965        count = fromUSectionUChar.get(fromUSectionUCharIndex++);
4966        value = fromUSectionValues.get(fromUSectionValuesIndex++);
4967        if(value!=0 && (FROM_U_IS_ROUNDTRIP(value) || useFallback) && FROM_U_GET_LENGTH(value)>=minLength) {
4968            if(c>=0){
4969                setFillIn.add(c);
4970            } else {
4971                StringBuilder normalizedStringBuilder = new StringBuilder();
4972                for(int j=0; j<length;j++){
4973                    normalizedStringBuilder.append(s[j]);
4974                }
4975                String normalizedString = normalizedStringBuilder.toString();
4976                for(int j=0;j<length;j++){
4977                    setFillIn.add(normalizedString);
4978                }
4979              }
4980        }
4981 
4982        for(i=0; i<count; ++i){
4983            s[length] = fromUSectionUChar.get(fromUSectionUCharIndex + i);
4984            value = fromUSectionValues.get(fromUSectionValuesIndex + i);
4985 
4986            if(value==0) {
4987                /* no mapping, do nothing */
4988            } else if (FROM_U_IS_PARTIAL(value)) {
4989                extGetUnicodeSetString( cx, setFillIn, useFallback, minLength, UConverterConstants.U_SENTINEL, s, length+1,
4990                        FROM_U_GET_PARTIAL_INDEX(value));
4991            } else if ((useFallback ? (value&FROM_U_RESERVED_MASK)==0:((value&(FROM_U_ROUNDTRIP_FLAG|FROM_U_RESERVED_MASK))==FROM_U_ROUNDTRIP_FLAG))
4992                    && FROM_U_GET_LENGTH(value)>=minLength) {
4993                StringBuilder normalizedStringBuilder = new StringBuilder(); // String for composite characters
4994                for(int j=0; j<(length+1);j++){
4995                    normalizedStringBuilder.append(s[j]);
4996                }
4997              setFillIn.add(normalizedStringBuilder.toString());
4998            }
4999        }
5000 
5001     }
5002 
5003 
extGetUnicodeSet(UnicodeSet setFillIn, int which, int filter, UConverterSharedData Data)5004     static void extGetUnicodeSet(UnicodeSet setFillIn, int which, int filter, UConverterSharedData Data){
5005         int st1, stage1Length, st2, st3, minLength;
5006         int ps2, ps3;
5007 
5008         CharBuffer stage12, stage3;
5009         int value, length;
5010         IntBuffer stage3b;
5011         boolean useFallback;
5012         char s[] = new char[MAX_UCHARS];
5013         int c;
5014         ByteBuffer cx = Data.mbcs.extIndexes;
5015         if(cx == null){
5016             return;
5017         }
5018         stage12 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX,char.class );
5019         stage3 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX,char.class );
5020         stage3b = (IntBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX,int.class );
5021 
5022         stage1Length = cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH);
5023         useFallback = (which==ROUNDTRIP_AND_FALLBACK_SET);
5024 
5025         c = 0;
5026         if(filter == UCNV_SET_FILTER_2022_CN) {
5027             minLength = 3;
5028         } else if (Data.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY || filter != UCNV_SET_FILTER_NONE) {
5029             /* DBCS-only, ignore single-byte results */
5030             minLength = 2;
5031         } else {
5032             minLength = 1;
5033         }
5034 
5035         for(st1=0; st1< stage1Length; ++st1){
5036             st2 = stage12.get(st1);
5037             if(st2>stage1Length) {
5038                 ps2 = st2;
5039                 for(st2=0;st2<64;++st2){
5040                     st3=(stage12.get(ps2+st2))<<STAGE_2_LEFT_SHIFT;
5041                     if(st3!= 0){
5042                         ps3 = st3;
5043                         do {
5044                             value = stage3b.get(stage3.get(ps3++));
5045                             if(value==0){
5046                                 /* no mapping do nothing */
5047                             }else if (FROM_U_IS_PARTIAL(value)){
5048                                 length = 0;
5049                                 length=UTF16.append(s, length, c);
5050                                 extGetUnicodeSetString(cx,setFillIn,useFallback,minLength,c,s,length,FROM_U_GET_PARTIAL_INDEX(value));
5051                             } else if ((useFallback ?  (value&FROM_U_RESERVED_MASK)==0 :((value&(FROM_U_ROUNDTRIP_FLAG|FROM_U_RESERVED_MASK))== FROM_U_ROUNDTRIP_FLAG)) &&
5052                                     FROM_U_GET_LENGTH(value)>=minLength){
5053 
5054                                 switch(filter) {
5055                                 case UCNV_SET_FILTER_2022_CN:
5056                                     if(!(FROM_U_GET_LENGTH(value)==3 && FROM_U_GET_DATA(value)<=0x82ffff)){
5057                                         continue;
5058                                     }
5059                                     break;
5060                                 case UCNV_SET_FILTER_SJIS:
5061                                     if(!(FROM_U_GET_LENGTH(value)==2 && (value=FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)){
5062                                         continue;
5063                                     }
5064                                     break;
5065                                 case UCNV_SET_FILTER_GR94DBCS:
5066                                     if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfefe - 0xa1a1)
5067                                             && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
5068                                         continue;
5069                                     }
5070                                     break;
5071                                 case UCNV_SET_FILTER_HZ:
5072                                     if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfdfe - 0xa1a1)
5073                                             && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
5074                                         continue;
5075                                     }
5076                                     break;
5077                                 default:
5078                                     /*
5079                                      * UCNV_SET_FILTER_NONE,
5080                                      * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
5081                                      */
5082                                     break;
5083                                 }
5084                                 setFillIn.add(c);
5085 
5086                             }
5087                         }while((++c&0xf) != 0);
5088 
5089                     } else {
5090                         c+=16;   /* emplty stage3 block */
5091                     }
5092                 }
5093             } else {
5094                 c+=1024;  /* empty stage 2 block*/
5095             }
5096         }
5097     }
5098 
MBCSGetUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which)5099     void MBCSGetUnicodeSetForUnicode(UConverterSharedData data, UnicodeSet setFillIn, int which){
5100         MBCSGetFilteredUnicodeSetForUnicode(data, setFillIn, which,
5101                 this.sharedData.mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? UCNV_SET_FILTER_DBCS_ONLY : UCNV_SET_FILTER_NONE );
5102     }
5103 
5104     @Override
getUnicodeSetImpl( UnicodeSet setFillIn, int which)5105     void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
5106         if((options & MBCS_OPTION_GB18030)!=0){
5107             setFillIn.add(0, 0xd7ff);
5108             setFillIn.add(0xe000, 0x10ffff);
5109         }
5110         else {
5111             this.MBCSGetUnicodeSetForUnicode(sharedData, setFillIn, which);
5112         }
5113     }
5114 
5115 }
5116