• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5 *******************************************************************************
6 * Copyright (C) 2005-2016, International Business Machines Corporation and    *
7 * others. All Rights Reserved.                                                *
8 *******************************************************************************
9 */
10 package ohos.global.icu.text;
11 
12 import java.io.ByteArrayInputStream;
13 import java.io.IOException;
14 import java.io.InputStream;
15 import java.io.InputStreamReader;
16 import java.io.Reader;
17 
18 
19 /**
20  * This class represents a charset that has been identified by a CharsetDetector
21  * as a possible encoding for a set of input data.  From an instance of this
22  * class, you can ask for a confidence level in the charset identification,
23  * or for Java Reader or String to access the original byte data in Unicode form.
24  * <p>
25  * Instances of this class are created only by CharsetDetectors.
26  * <p>
27  * Note:  this class has a natural ordering that is inconsistent with equals.
28  *        The natural ordering is based on the match confidence value.
29  *
30  * @hide exposed on OHOS
31  */
32 public class CharsetMatch implements Comparable<CharsetMatch> {
33 
34 
35     /**
36      * Create a java.io.Reader for reading the Unicode character data corresponding
37      * to the original byte data supplied to the Charset detect operation.
38      * <p>
39      * CAUTION:  if the source of the byte data was an InputStream, a Reader
40      * can be created for only one matching char set using this method.  If more
41      * than one charset needs to be tried, the caller will need to reset
42      * the InputStream and create InputStreamReaders itself, based on the charset name.
43      *
44      * @return the Reader for the Unicode character data.
45      */
getReader()46     public Reader getReader() {
47         InputStream inputStream = fInputStream;
48 
49         if (inputStream == null) {
50             inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength);
51         }
52 
53         try {
54             inputStream.reset();
55             return new InputStreamReader(inputStream, getName());
56         } catch (IOException e) {
57             return null;
58         }
59     }
60 
61     /**
62      * Create a Java String from Unicode character data corresponding
63      * to the original byte data supplied to the Charset detect operation.
64      *
65      * @return a String created from the converted input data.
66      */
getString()67     public String getString()  throws java.io.IOException {
68         return getString(-1);
69 
70     }
71 
72     /**
73      * Create a Java String from Unicode character data corresponding
74      * to the original byte data supplied to the Charset detect operation.
75      * The length of the returned string is limited to the specified size;
76      * the string will be trunctated to this length if necessary.  A limit value of
77      * zero or less is ignored, and treated as no limit.
78      *
79      * @param maxLength The maximium length of the String to be created when the
80      *                  source of the data is an input stream, or -1 for
81      *                  unlimited length.
82      * @return a String created from the converted input data.
83      */
getString(int maxLength)84     public String getString(int maxLength) throws java.io.IOException {
85         String result = null;
86         if (fInputStream != null) {
87             StringBuilder sb = new StringBuilder();
88             char[] buffer = new char[1024];
89             Reader reader = getReader();
90             int max = maxLength < 0? Integer.MAX_VALUE : maxLength;
91             int bytesRead = 0;
92 
93             while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) >= 0) {
94                 sb.append(buffer, 0, bytesRead);
95                 max -= bytesRead;
96             }
97 
98             reader.close();
99 
100             return sb.toString();
101         } else {
102             String name = getName();
103             /*
104              * getName() may return a name with a suffix 'rtl' or 'ltr'. This cannot
105              * be used to open a charset (e.g. IBM424_rtl). The ending '_rtl' or 'ltr'
106              * should be stripped off before creating the string.
107              */
108             int startSuffix = name.indexOf("_rtl") < 0 ? name.indexOf("_ltr") : name.indexOf("_rtl");
109             if (startSuffix > 0) {
110                 name = name.substring(0, startSuffix);
111             }
112             result = new String(fRawInput, name);
113         }
114         return result;
115 
116     }
117 
118     /**
119      * Get an indication of the confidence in the charset detected.
120      * Confidence values range from 0-100, with larger numbers indicating
121      * a better match of the input data to the characteristics of the
122      * charset.
123      *
124      * @return the confidence in the charset match
125      */
getConfidence()126     public int getConfidence() {
127         return fConfidence;
128     }
129 
130     /**
131      * Get the name of the detected charset.
132      * The name will be one that can be used with other APIs on the
133      * platform that accept charset names.  It is the "Canonical name"
134      * as defined by the class java.nio.charset.Charset; for
135      * charsets that are registered with the IANA charset registry,
136      * this is the MIME-preferred registerd name.
137      *
138      * @see java.nio.charset.Charset
139      * @see java.io.InputStreamReader
140      *
141      * @return The name of the charset.
142      */
getName()143     public String getName() {
144         return fCharsetName;
145     }
146 
147     /**
148      * Get the ISO code for the language of the detected charset.
149      *
150      * @return The ISO code for the language or <code>null</code> if the language cannot be determined.
151      */
getLanguage()152     public String getLanguage() {
153         return fLang;
154     }
155 
156     /**
157      * Compare to other CharsetMatch objects.
158      * Comparison is based on the match confidence value, which
159      *   allows CharsetDetector.detectAll() to order its results.
160      *
161      * @param other the CharsetMatch object to compare against.
162      * @return  a negative integer, zero, or a positive integer as the
163      *          confidence level of this CharsetMatch
164      *          is less than, equal to, or greater than that of
165      *          the argument.
166      * @throws ClassCastException if the argument is not a CharsetMatch.
167      */
168     @Override
compareTo(CharsetMatch other)169     public int compareTo (CharsetMatch other) {
170         int compareResult = 0;
171         if (this.fConfidence > other.fConfidence) {
172             compareResult = 1;
173         } else if (this.fConfidence < other.fConfidence) {
174             compareResult = -1;
175         }
176         return compareResult;
177     }
178 
179     /*
180      *  Constructor.  Implementation internal
181      */
CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf)182     CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf) {
183         fConfidence = conf;
184 
185         // The references to the original application input data must be copied out
186         //   of the charset recognizer to here, in case the application resets the
187         //   recognizer before using this CharsetMatch.
188         if (det.fInputStream == null) {
189             // We only want the existing input byte data if it came straight from the user,
190             //   not if is just the head of a stream.
191             fRawInput    = det.fRawInput;
192             fRawLength   = det.fRawLength;
193         }
194         fInputStream = det.fInputStream;
195         fCharsetName = rec.getName();
196         fLang = rec.getLanguage();
197     }
198 
199     /*
200      *  Constructor.  Implementation internal
201      */
CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf, String csName, String lang)202     CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf, String csName, String lang) {
203         fConfidence = conf;
204 
205         // The references to the original application input data must be copied out
206         //   of the charset recognizer to here, in case the application resets the
207         //   recognizer before using this CharsetMatch.
208         if (det.fInputStream == null) {
209             // We only want the existing input byte data if it came straight from the user,
210             //   not if is just the head of a stream.
211             fRawInput    = det.fRawInput;
212             fRawLength   = det.fRawLength;
213         }
214         fInputStream = det.fInputStream;
215         fCharsetName = csName;
216         fLang = lang;
217     }
218 
219 
220     //
221     //   Private Data
222     //
223     private int                 fConfidence;
224     private byte[]              fRawInput = null;     // Original, untouched input bytes.
225                                                       //  If user gave us a byte array, this is it.
226     private int                 fRawLength;           // Length of data in fRawInput array.
227 
228     private InputStream         fInputStream = null;  // User's input stream, or null if the user
229                                                       //   gave us a byte array.
230 
231     private String              fCharsetName;         // The name of the charset this CharsetMatch
232                                                       //   represents.  Filled in by the recognizer.
233     private String              fLang;                // The language, if one was determined by
234                                                       //   the recognizer during the detect operation.
235 }
236