• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5 *******************************************************************************
6 * Copyright (C) 2005-2016, International Business Machines Corporation and    *
7 * others. All Rights Reserved.                                                *
8 *******************************************************************************
9 */
10 package ohos.global.icu.text;
11 
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.io.Reader;
15 import java.util.ArrayList;
16 import java.util.Arrays;
17 import java.util.Collections;
18 import java.util.List;
19 
20 
21 /**
22  * <code>CharsetDetector</code> provides a facility for detecting the
23  * charset or encoding of character data in an unknown format.
24  * The input data can either be from an input stream or an array of bytes.
25  * The result of the detection operation is a list of possibly matching
26  * charsets, or, for simple use, you can just ask for a Java Reader that
27  * will will work over the input data.
28  * <p>
29  * Character set detection is at best an imprecise operation.  The detection
30  * process will attempt to identify the charset that best matches the characteristics
31  * of the byte data, but the process is partly statistical in nature, and
32  * the results can not be guaranteed to always be correct.
33  * <p>
34  * For best accuracy in charset detection, the input data should be primarily
35  * in a single language, and a minimum of a few hundred bytes worth of plain text
36  * in the language are needed.  The detection process will attempt to
37  * ignore html or xml style markup that could otherwise obscure the content.
38  * <p>
39  * @hide exposed on OHOS
40  */
41 public class CharsetDetector {
42 
43 //   Question: Should we have getters corresponding to the setters for input text
44 //   and declared encoding?
45 
46 //   A thought: If we were to create our own type of Java Reader, we could defer
47 //   figuring out an actual charset for data that starts out with too much English
48 //   only ASCII until the user actually read through to something that didn't look
49 //   like 7 bit English.  If  nothing else ever appeared, we would never need to
50 //   actually choose the "real" charset.  All assuming that the application just
51 //   wants the data, and doesn't care about a char set name.
52 
53     /**
54      *   Constructor
55      */
CharsetDetector()56     public CharsetDetector() {
57     }
58 
59     /**
60      * Set the declared encoding for charset detection.
61      *  The declared encoding of an input text is an encoding obtained
62      *  from an http header or xml declaration or similar source that
63      *  can be provided as additional information to the charset detector.
64      *  A match between a declared encoding and a possible detected encoding
65      *  will raise the quality of that detected encoding by a small delta,
66      *  and will also appear as a "reason" for the match.
67      * <p>
68      * A declared encoding that is incompatible with the input data being
69      * analyzed will not be added to the list of possible encodings.
70      *
71      *  @param encoding The declared encoding
72      */
setDeclaredEncoding(String encoding)73     public CharsetDetector setDeclaredEncoding(String encoding) {
74         fDeclaredEncoding = encoding;
75         return this;
76     }
77 
78     /**
79      * Set the input text (byte) data whose charset is to be detected.
80      *
81      * @param in the input text of unknown encoding
82      *
83      * @return This CharsetDetector
84      */
setText(byte [] in)85     public CharsetDetector setText(byte [] in) {
86         fRawInput  = in;
87         fRawLength = in.length;
88 
89         return this;
90     }
91 
92     private static final int kBufSize = 8000;
93 
94     /**
95      * Set the input text (byte) data whose charset is to be detected.
96      *  <p>
97      *   The input stream that supplies the character data must have markSupported()
98      *   == true; the charset detection process will read a small amount of data,
99      *   then return the stream to its original position via
100      *   the InputStream.reset() operation.  The exact amount that will
101      *   be read depends on the characteristics of the data itself.
102      *
103      * @param in the input text of unknown encoding
104      *
105      * @return This CharsetDetector
106      */
107 
setText(InputStream in)108     public CharsetDetector setText(InputStream in) throws IOException {
109         fInputStream = in;
110         fInputStream.mark(kBufSize);
111         fRawInput = new byte[kBufSize];   // Always make a new buffer because the
112                                           //   previous one may have come from the caller,
113                                           //   in which case we can't touch it.
114         fRawLength = 0;
115         int remainingLength = kBufSize;
116         while (remainingLength > 0 ) {
117             // read() may give data in smallish chunks, esp. for remote sources.  Hence, this loop.
118             int  bytesRead = fInputStream.read(fRawInput, fRawLength, remainingLength);
119             if (bytesRead <= 0) {
120                  break;
121             }
122             fRawLength += bytesRead;
123             remainingLength -= bytesRead;
124         }
125         fInputStream.reset();
126 
127         return this;
128     }
129 
130 
131     /**
132      * Return the charset that best matches the supplied input data.
133      *
134      * Note though, that because the detection
135      * only looks at the start of the input data,
136      * there is a possibility that the returned charset will fail to handle
137      * the full set of input data.
138      * <p>
139      * Raise an exception if
140      *  <ul>
141      *    <li>no charset appears to match the data.</li>
142      *    <li>no input text has been provided</li>
143      *  </ul>
144      *
145      * @return a CharsetMatch object representing the best matching charset, or
146      *         <code>null</code> if there are no matches.
147      */
detect()148     public CharsetMatch detect() {
149 //   TODO:  A better implementation would be to copy the detect loop from
150 //          detectAll(), and cut it short as soon as a match with a high confidence
151 //          is found.  This is something to be done later, after things are otherwise
152 //          working.
153         CharsetMatch matches[] = detectAll();
154 
155         if (matches == null || matches.length == 0) {
156             return null;
157         }
158 
159         return matches[0];
160      }
161 
162     /**
163      *  Return an array of all charsets that appear to be plausible
164      *  matches with the input data.  The array is ordered with the
165      *  best quality match first.
166      * <p>
167      * Raise an exception if
168      *  <ul>
169      *    <li>no charsets appear to match the input data.</li>
170      *    <li>no input text has been provided</li>
171      *  </ul>
172      *
173      * @return An array of CharsetMatch objects representing possibly matching charsets.
174      */
detectAll()175     public CharsetMatch[] detectAll() {
176         ArrayList<CharsetMatch>         matches = new ArrayList<CharsetMatch>();
177 
178         MungeInput();  // Strip html markup, collect byte stats.
179 
180         //  Iterate over all possible charsets, remember all that
181         //    give a match quality > 0.
182         for (int i = 0; i < ALL_CS_RECOGNIZERS.size(); i++) {
183             CSRecognizerInfo rcinfo = ALL_CS_RECOGNIZERS.get(i);
184             boolean active = (fEnabledRecognizers != null) ? fEnabledRecognizers[i] : rcinfo.isDefaultEnabled;
185             if (active) {
186                 CharsetMatch m = rcinfo.recognizer.match(this);
187                 if (m != null) {
188                     matches.add(m);
189                 }
190             }
191         }
192         Collections.sort(matches);      // CharsetMatch compares on confidence
193         Collections.reverse(matches);   //  Put best match first.
194         CharsetMatch [] resultArray = new CharsetMatch[matches.size()];
195         resultArray = matches.toArray(resultArray);
196         return resultArray;
197     }
198 
199 
200     /**
201      * Autodetect the charset of an inputStream, and return a Java Reader
202      * to access the converted input data.
203      * <p>
204      * This is a convenience method that is equivalent to
205      *   <code>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getReader();</code>
206      * <p>
207      *   For the input stream that supplies the character data, markSupported()
208      *   must be true; the  charset detection will read a small amount of data,
209      *   then return the stream to its original position via
210      *   the InputStream.reset() operation.  The exact amount that will
211      *    be read depends on the characteristics of the data itself.
212      *<p>
213      * Raise an exception if no charsets appear to match the input data.
214      *
215      * @param in The source of the byte data in the unknown charset.
216      *
217      * @param declaredEncoding  A declared encoding for the data, if available,
218      *           or null or an empty string if none is available.
219      */
getReader(InputStream in, String declaredEncoding)220     public Reader getReader(InputStream in, String declaredEncoding) {
221         fDeclaredEncoding = declaredEncoding;
222 
223         try {
224             setText(in);
225 
226             CharsetMatch match = detect();
227 
228             if (match == null) {
229                 return null;
230             }
231 
232             return match.getReader();
233         } catch (IOException e) {
234             return null;
235         }
236     }
237 
238     /**
239      * Autodetect the charset of an inputStream, and return a String
240      * containing the converted input data.
241      * <p>
242      * This is a convenience method that is equivalent to
243      *   <code>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getString();</code>
244      *<p>
245      * Raise an exception if no charsets appear to match the input data.
246      *
247      * @param in The source of the byte data in the unknown charset.
248      *
249      * @param declaredEncoding  A declared encoding for the data, if available,
250      *           or null or an empty string if none is available.
251      */
getString(byte[] in, String declaredEncoding)252     public String getString(byte[] in, String declaredEncoding)
253     {
254         fDeclaredEncoding = declaredEncoding;
255 
256         try {
257             setText(in);
258 
259             CharsetMatch match = detect();
260 
261             if (match == null) {
262                 return null;
263             }
264 
265             return match.getString(-1);
266         } catch (IOException e) {
267             return null;
268         }
269     }
270 
271 
272     /**
273      * Get the names of all charsets supported by <code>CharsetDetector</code> class.
274      * <p>
275      * <b>Note:</b> Multiple different charset encodings in a same family may use
276      * a single shared name in this implementation. For example, this method returns
277      * an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252"
278      * (Windows Latin 1). However, actual detection result could be "windows-1252"
279      * when the input data matches Latin 1 code points with any points only available
280      * in "windows-1252".
281      *
282      * @return an array of the names of all charsets supported by
283      * <code>CharsetDetector</code> class.
284      */
getAllDetectableCharsets()285     public static String[] getAllDetectableCharsets() {
286         String[] allCharsetNames = new String[ALL_CS_RECOGNIZERS.size()];
287         for (int i = 0; i < allCharsetNames.length; i++) {
288             allCharsetNames[i] = ALL_CS_RECOGNIZERS.get(i).recognizer.getName();
289         }
290         return allCharsetNames;
291     }
292 
293     /**
294      * Test whether or not input filtering is enabled.
295      *
296      * @return <code>true</code> if input text will be filtered.
297      *
298      * @see #enableInputFilter
299      */
inputFilterEnabled()300     public boolean inputFilterEnabled()
301     {
302         return fStripTags;
303     }
304 
305     /**
306      * Enable filtering of input text. If filtering is enabled,
307      * text within angle brackets ("&lt;" and "&gt;") will be removed
308      * before detection.
309      *
310      * @param filter <code>true</code> to enable input text filtering.
311      *
312      * @return The previous setting.
313      */
enableInputFilter(boolean filter)314     public boolean enableInputFilter(boolean filter)
315     {
316         boolean previous = fStripTags;
317 
318         fStripTags = filter;
319 
320         return previous;
321     }
322 
323     /*
324      *  MungeInput - after getting a set of raw input data to be analyzed, preprocess
325      *               it by removing what appears to be html markup.
326      */
MungeInput()327     private void MungeInput() {
328         int srci = 0;
329         int dsti = 0;
330         byte b;
331         boolean  inMarkup = false;
332         int      openTags = 0;
333         int      badTags  = 0;
334 
335         //
336         //  html / xml markup stripping.
337         //     quick and dirty, not 100% accurate, but hopefully good enough, statistically.
338         //     discard everything within < brackets >
339         //     Count how many total '<' and illegal (nested) '<' occur, so we can make some
340         //     guess as to whether the input was actually marked up at all.
341         if (fStripTags) {
342             for (srci = 0; srci < fRawLength && dsti < fInputBytes.length; srci++) {
343                 b = fRawInput[srci];
344                 if (b == (byte)'<') {
345                     if (inMarkup) {
346                         badTags++;
347                     }
348                     inMarkup = true;
349                     openTags++;
350                 }
351 
352                 if (! inMarkup) {
353                     fInputBytes[dsti++] = b;
354                 }
355 
356                 if (b == (byte)'>') {
357                     inMarkup = false;
358                 }
359             }
360 
361             fInputLen = dsti;
362         }
363 
364         //
365         //  If it looks like this input wasn't marked up, or if it looks like it's
366         //    essentially nothing but markup abandon the markup stripping.
367         //    Detection will have to work on the unstripped input.
368         //
369         if (openTags<5 || openTags/5 < badTags ||
370                 (fInputLen < 100 && fRawLength>600)) {
371             int limit = fRawLength;
372 
373             if (limit > kBufSize) {
374                 limit = kBufSize;
375             }
376 
377             for (srci=0; srci<limit; srci++) {
378                 fInputBytes[srci] = fRawInput[srci];
379             }
380             fInputLen = srci;
381         }
382 
383         //
384         // Tally up the byte occurence statistics.
385         //   These are available for use by the various detectors.
386         //
387         Arrays.fill(fByteStats, (short)0);
388         for (srci=0; srci<fInputLen; srci++) {
389             int val = fInputBytes[srci] & 0x00ff;
390             fByteStats[val]++;
391         }
392 
393         fC1Bytes = false;
394         for (int i = 0x80; i <= 0x9F; i += 1) {
395             if (fByteStats[i] != 0) {
396                 fC1Bytes = true;
397                 break;
398             }
399         }
400      }
401 
402     /*
403      *  The following items are accessed by individual CharsetRecongizers during
404      *     the recognition process
405      *
406      */
407     byte[]      fInputBytes =       // The text to be checked.  Markup will have been
408                    new byte[kBufSize];  //   removed if appropriate.
409 
410     int         fInputLen;          // Length of the byte data in fInputBytes.
411 
412     short       fByteStats[] =      // byte frequency statistics for the input text.
413                    new short[256];  //   Value is percent, not absolute.
414                                     //   Value is rounded up, so zero really means zero occurences.
415 
416     boolean     fC1Bytes =          // True if any bytes in the range 0x80 - 0x9F are in the input;
417                    false;
418 
419     String      fDeclaredEncoding;
420 
421 
422     byte[]               fRawInput;     // Original, untouched input bytes.
423                                         //  If user gave us a byte array, this is it.
424                                         //  If user gave us a stream, it's read to a
425                                         //  buffer here.
426     int                  fRawLength;    // Length of data in fRawInput array.
427 
428     InputStream          fInputStream;  // User's input stream, or null if the user
429                                         //   gave us a byte array.
430 
431     //
432     //  Stuff private to CharsetDetector
433     //
434     private boolean      fStripTags =   // If true, setText() will strip tags from input text.
435                            false;
436 
437     private boolean[]    fEnabledRecognizers;   // If not null, active set of charset recognizers had
438                                                 // been changed from the default. The array index is
439                                                 // corresponding to ALL_RECOGNIZER. See setDetectableCharset().
440 
441     private static class CSRecognizerInfo {
442         CharsetRecognizer recognizer;
443         boolean isDefaultEnabled;
444 
CSRecognizerInfo(CharsetRecognizer recognizer, boolean isDefaultEnabled)445         CSRecognizerInfo(CharsetRecognizer recognizer, boolean isDefaultEnabled) {
446             this.recognizer = recognizer;
447             this.isDefaultEnabled = isDefaultEnabled;
448         }
449     }
450 
451     /*
452      * List of recognizers for all charsets known to the implementation.
453      */
454     private static final List<CSRecognizerInfo> ALL_CS_RECOGNIZERS;
455 
456     static {
457         List<CSRecognizerInfo> list = new ArrayList<CSRecognizerInfo>();
458 
list.add(new CSRecognizerInfo(new CharsetRecog_UTF8(), true))459         list.add(new CSRecognizerInfo(new CharsetRecog_UTF8(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_16_BE(), true))460         list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_16_BE(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_16_LE(), true))461         list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_16_LE(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_32_BE(), true))462         list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_32_BE(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_32_LE(), true))463         list.add(new CSRecognizerInfo(new CharsetRecog_Unicode.CharsetRecog_UTF_32_LE(), true));
464 
list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_sjis(), true))465         list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_sjis(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022JP(), true))466         list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022JP(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022CN(), true))467         list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022CN(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022KR(), true))468         list.add(new CSRecognizerInfo(new CharsetRecog_2022.CharsetRecog_2022KR(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_gb_18030(), true))469         list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_gb_18030(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_euc_jp(), true))470         list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_euc_jp(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_euc_kr(), true))471         list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_euc.CharsetRecog_euc_kr(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_big5(), true))472         list.add(new CSRecognizerInfo(new CharsetRecog_mbcs.CharsetRecog_big5(), true));
473 
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_1(), true))474         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_1(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_2(), true))475         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_2(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_5_ru(), true))476         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_5_ru(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_6_ar(), true))477         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_6_ar(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_7_el(), true))478         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_7_el(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_8_I_he(), true))479         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_8_I_he(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_8_he(), true))480         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_8_he(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_windows_1251(), true))481         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_windows_1251(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_windows_1256(), true))482         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_windows_1256(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_KOI8_R(), true))483         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_KOI8_R(), true));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_9_tr(), true))484         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_8859_9_tr(), true));
485 
486         // IBM 420/424 recognizers are disabled by default
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM424_he_rtl(), false))487         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM424_he_rtl(), false));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM424_he_ltr(), false))488         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM424_he_ltr(), false));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM420_ar_rtl(), false))489         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM420_ar_rtl(), false));
list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM420_ar_ltr(), false))490         list.add(new CSRecognizerInfo(new CharsetRecog_sbcs.CharsetRecog_IBM420_ar_ltr(), false));
491 
492         ALL_CS_RECOGNIZERS = Collections.unmodifiableList(list);
493     }
494 
495     /**
496      * Get the names of charsets that can be recognized by this CharsetDetector instance.
497      *
498      * @return an array of the names of charsets that can be recognized by this CharsetDetector
499      * instance.
500      *
501      * @deprecated This API is ICU internal only.
502      * @hide draft / provisional / internal are hidden on OHOS
503      */
504     @Deprecated
getDetectableCharsets()505     public String[] getDetectableCharsets() {
506         List<String> csnames = new ArrayList<String>(ALL_CS_RECOGNIZERS.size());
507         for (int i = 0; i < ALL_CS_RECOGNIZERS.size(); i++) {
508             CSRecognizerInfo rcinfo = ALL_CS_RECOGNIZERS.get(i);
509             boolean active = (fEnabledRecognizers == null) ? rcinfo.isDefaultEnabled : fEnabledRecognizers[i];
510             if (active) {
511                 csnames.add(rcinfo.recognizer.getName());
512             }
513         }
514         return csnames.toArray(new String[csnames.size()]);
515     }
516 
517     /**
518      * Enable or disable individual charset encoding.
519      * A name of charset encoding must be included in the names returned by
520      * {@link #getAllDetectableCharsets()}.
521      *
522      * @param encoding the name of charset encoding.
523      * @param enabled <code>true</code> to enable, or <code>false</code> to disable the
524      * charset encoding.
525      * @return A reference to this <code>CharsetDetector</code>.
526      * @throws IllegalArgumentException when the name of charset encoding is
527      * not supported.
528      *
529      * @deprecated This API is ICU internal only.
530      * @hide draft / provisional / internal are hidden on OHOS
531      */
532     @Deprecated
setDetectableCharset(String encoding, boolean enabled)533     public CharsetDetector setDetectableCharset(String encoding, boolean enabled) {
534         int modIdx = -1;
535         boolean isDefaultVal = false;
536         for (int i = 0; i < ALL_CS_RECOGNIZERS.size(); i++) {
537             CSRecognizerInfo csrinfo = ALL_CS_RECOGNIZERS.get(i);
538             if (csrinfo.recognizer.getName().equals(encoding)) {
539                 modIdx = i;
540                 isDefaultVal = (csrinfo.isDefaultEnabled == enabled);
541                 break;
542             }
543         }
544         if (modIdx < 0) {
545             // No matching encoding found
546             throw new IllegalArgumentException("Invalid encoding: " + "\"" + encoding + "\"");
547         }
548 
549         if (fEnabledRecognizers == null && !isDefaultVal) {
550             // Create an array storing the non default setting
551             fEnabledRecognizers = new boolean[ALL_CS_RECOGNIZERS.size()];
552 
553             // Initialize the array with default info
554             for (int i = 0; i < ALL_CS_RECOGNIZERS.size(); i++) {
555                 fEnabledRecognizers[i] = ALL_CS_RECOGNIZERS.get(i).isDefaultEnabled;
556             }
557         }
558 
559         if (fEnabledRecognizers != null) {
560             fEnabledRecognizers[modIdx] = enabled;
561         }
562 
563         return this;
564     }
565 }
566