• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4 *******************************************************************************
5 * Copyright (C) 2006-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9 
10 package com.ibm.icu.charset;
11 
12 import java.io.IOException;
13 import java.nio.charset.Charset;
14 import java.nio.charset.UnsupportedCharsetException;
15 import java.nio.charset.spi.CharsetProvider;
16 import java.util.Collections;
17 import java.util.Iterator;
18 import java.util.LinkedList;
19 import java.util.List;
20 
21 import com.ibm.icu.impl.InvalidFormatException;
22 
23 
24 /**
25  * A concrete subclass of CharsetProvider for loading and providing charset converters
26  * in ICU.
27  * @stable ICU 3.6
28  */
29 public final class CharsetProviderICU extends CharsetProvider{
30     /**
31      * List of available ICU Charsets, empty during static initialization.
32      * Not a Set or Map, so that we can add different Charset objects with the same name(),
33      * which means that they are .equals(). See ICU ticket #11493.
34      */
35     private static List<Charset> icuCharsets = Collections.<Charset>emptyList();
36 
37     /**
38      * Default constructor
39      * @stable ICU 3.6
40      */
CharsetProviderICU()41     public CharsetProviderICU() {
42     }
43 
44     /**
45      * Constructs a Charset for the given charset name.
46      * Implements the abstract method of super class.
47      * @param charsetName charset name
48      * @return Charset object for the given charset name, null if unsupported
49      * @stable ICU 3.6
50      */
51     @Override
charsetForName(String charsetName)52     public final Charset charsetForName(String charsetName){
53         try{
54             // extract the options from the charset name
55             String optionsString = "";
56             if (charsetName.endsWith(UConverterConstants.OPTION_SWAP_LFNL_STRING)) {
57                 /* Remove and save the swap lfnl option string portion of the charset name. */
58                 optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING;
59                 charsetName = charsetName.substring(0, charsetName.length() - optionsString.length());
60             }
61             // get the canonical name
62             String icuCanonicalName = getICUCanonicalName(charsetName);
63 
64             // create the converter object and return it
65             if(icuCanonicalName==null || icuCanonicalName.length()==0){
66                 // Try the original name, may be something added and not in the alias table.
67                 // Will get an unsupported encoding exception if it doesn't work.
68                 icuCanonicalName = charsetName;
69             }
70             return getCharset(icuCanonicalName, optionsString);
71         }catch(UnsupportedCharsetException ex){
72         }catch(IOException ex){
73         }
74         return null;
75     }
76 
77     /**
78      * Constructs a charset for the given ICU conversion table from the specified class path.
79      * Example use: <code>cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");</code>.
80      * In this example myConverter.cnv would exist in the com/myCompany/myDataPackage Java package.
81      * Conversion tables can be made with ICU4C's makeconv tool.
82      * This function allows you to allows you to load user defined conversion
83      * tables that are outside of ICU's core data.
84      * @param charsetName The name of the charset conversion table.
85      * @param classPath The class path that contain the conversion table.
86      * @return charset object for the given charset name, null if unsupported
87      * @stable ICU 3.8
88      */
charsetForName(String charsetName, String classPath)89     public final Charset charsetForName(String charsetName, String classPath) {
90         return charsetForName(charsetName, classPath, null);
91     }
92 
93     /**
94      * Constructs a charset for the given ICU conversion table from the specified class path.
95      * This function is similar to {@link #charsetForName(String, String)}.
96      * @param charsetName The name of the charset conversion table.
97      * @param classPath The class path that contain the conversion table.
98      * @param loader the class object from which to load the charset conversion table
99      * @return charset object for the given charset name, null if unsupported
100      * @stable ICU 3.8
101      */
charsetForName(String charsetName, String classPath, ClassLoader loader)102     public Charset charsetForName(String charsetName, String classPath, ClassLoader loader) {
103         CharsetMBCS cs = null;
104         try {
105              cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, loader);
106         } catch (InvalidFormatException e) {
107             // return null;
108         }
109         return cs;
110     }
111 
112     /**
113      * Gets the canonical name of the converter as defined by Java
114      * @param enc converter name
115      * @return canonical name of the converter
116      * @internal
117      * @deprecated This API is ICU internal only.
118      */
119      @Deprecated
getICUCanonicalName(String enc)120      public static final String getICUCanonicalName(String enc)
121                                 throws UnsupportedCharsetException{
122         String canonicalName = null;
123         String ret = null;
124         try{
125             if(enc!=null){
126                  if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){
127                     ret = canonicalName;
128                 } else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){
129                     ret = canonicalName;
130                 } else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){
131                     /* we have some aliases in the form x-blah .. match those */
132                     ret = canonicalName;
133                 }/*else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){
134                     ret = canonicalName;
135                 }*/else if(enc.indexOf("x-")==0 || enc.indexOf("X-")==0){
136                     /* TODO: Match with getJavaCanonicalName method */
137                     /*
138                     char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0};
139                     strcpy(temp, encName+2);
140                     */
141                     // Remove the 'x-' and get the ICU canonical name
142                     if ((canonicalName = UConverterAlias.getAlias(enc.substring(2), 0))!=null) {
143                         ret = canonicalName;
144                     } else {
145                         ret = "";
146                     }
147 
148                 }else{
149                     /* unsupported encoding */
150                    ret = "";
151                 }
152             }
153             return ret;
154         }catch(IOException ex){
155             throw new UnsupportedCharsetException(enc);
156         }
157     }
getCharset(String icuCanonicalName, String optionsString)158     private static final Charset getCharset(String icuCanonicalName, String optionsString)
159             throws IOException {
160        String[] aliases = getAliases(icuCanonicalName);
161        String canonicalName = getJavaCanonicalName(icuCanonicalName);
162 
163        /* Concat the option string to the icuCanonicalName so that the options can be handled properly
164         * by the actual charset.
165         */
166        return (CharsetICU.getCharset(icuCanonicalName + optionsString, canonicalName, aliases));
167     }
168     /**
169      * Gets the canonical name of the converter as defined by Java
170      * @param charsetName converter name
171      * @return canonical name of the converter
172      * @internal
173      * @deprecated This API is ICU internal only.
174      */
175     @Deprecated
getJavaCanonicalName(String charsetName)176     public static String getJavaCanonicalName(String charsetName){
177         /*
178         If a charset listed in the IANA Charset Registry is supported by an implementation
179         of the Java platform then its canonical name must be the name listed in the registry.
180         Many charsets are given more than one name in the registry, in which case the registry
181         identifies one of the names as MIME-preferred. If a charset has more than one registry
182         name then its canonical name must be the MIME-preferred name and the other names in
183         the registry must be valid aliases. If a supported charset is not listed in the IANA
184         registry then its canonical name must begin with one of the strings "X-" or "x-".
185         */
186         if(charsetName==null ){
187             return null;
188         }
189         try{
190             String cName = null;
191             /* find out the alias with MIME tag */
192             if((cName=UConverterAlias.getStandardName(charsetName, "MIME"))!=null){
193             /* find out the alias with IANA tag */
194             }else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){
195             }else {
196                 /*
197                     check to see if an alias already exists with x- prefix, if yes then
198                     make that the canonical name
199                 */
200                 int aliasNum = UConverterAlias.countAliases(charsetName);
201                 String name;
202                 for(int i=0;i<aliasNum;i++){
203                     name = UConverterAlias.getAlias(charsetName, i);
204                     if(name!=null && name.indexOf("x-")==0){
205                         cName = name;
206                         break;
207                     }
208                 }
209                 /* last resort just append x- to any of the alias and
210                 make it the canonical name */
211                 if((cName==null || cName.length()==0)){
212                     name = UConverterAlias.getStandardName(charsetName, "UTR22");
213                     if(name==null && charsetName.indexOf(",")!=-1){
214                         name = UConverterAlias.getAlias(charsetName, 1);
215                     }
216                     /* if there is no UTR22 canonical name .. then just return itself*/
217                     if(name==null){
218                         name = charsetName;
219                     }
220                     cName = "x-"+ name;
221                 }
222             }
223             return cName;
224         }catch (IOException ex){
225 
226         }
227         return null;
228      }
229 
230     /**
231      * Gets the aliases associated with the converter name
232      * @param encName converter name
233      * @return converter names as elements in an object array
234      * @internal
235      * @deprecated This API is ICU internal only.
236      */
237     @Deprecated
getAliases(String encName)238     private static final String[] getAliases(String encName)throws IOException{
239         String[] ret = null;
240         int aliasNum = 0;
241         int i=0;
242         int j=0;
243         String aliasArray[/*50*/] = new String[50];
244 
245         if(encName != null){
246             aliasNum = UConverterAlias.countAliases(encName);
247             for(i=0,j=0;i<aliasNum;i++){
248                 String name = UConverterAlias.getAlias(encName,i);
249                 if(name.indexOf(',')==-1){
250                     aliasArray[j++]= name;
251                 }
252             }
253             ret = new String[j];
254             for(;--j>=0;) {
255                 ret[j] = aliasArray[j];
256             }
257 
258         }
259         return (ret);
260 
261     }
262 
263     /**
264      * Lazy-init the icuCharsets list.
265      * Could be done during static initialization if constructing all of the Charsets
266      * were cheap enough. See ICU ticket #11481.
267      */
loadAvailableICUCharsets()268     private static final synchronized void loadAvailableICUCharsets() {
269         if (!icuCharsets.isEmpty()) {
270             return;
271         }
272         List<Charset> icucs = new LinkedList<Charset>();
273         int num = UConverterAlias.countAvailable();
274         for (int i = 0; i < num; ++i) {
275             String name = UConverterAlias.getAvailableName(i);
276             try {
277                 Charset cs = getCharset(name, "");
278                 icucs.add(cs);
279             } catch(UnsupportedCharsetException ex) {
280             } catch(IOException e) {
281             }
282             // add only charsets that can be created!
283         }
284         // Unmodifiable so that charsets().next().remove() cannot change it.
285         icuCharsets = Collections.unmodifiableList(icucs);
286     }
287 
288     /**
289      * Returns an iterator for the available ICU Charsets.
290      * Implements the abstract method of super class.
291      * @return the Charset iterator
292      * @stable ICU 3.6
293      */
294     @Override
charsets()295     public final Iterator<Charset> charsets() {
296         loadAvailableICUCharsets();
297         return icuCharsets.iterator();
298     }
299 
300     /**
301      * Gets the canonical names of available ICU converters
302      * @return array of available converter names
303      * @internal
304      * @deprecated This API is ICU internal only.
305      */
306     @Deprecated
getAvailableNames()307      public static final String[] getAvailableNames() {
308         loadAvailableICUCharsets();
309         String[] names = new String[icuCharsets.size()];
310         int i = 0;
311         for (Charset cs : icuCharsets) {
312             names[i++] = cs.name();
313         }
314         return names;
315     }
316 
317     /**
318      * Return all names available
319      * @return String[] an array of all available names
320      * @internal
321      * @deprecated This API is ICU internal only.
322      */
323     @Deprecated
getAllNames()324      public static final String[] getAllNames(){
325         int num = UConverterAlias.countAvailable();
326         String[] names = new String[num];
327         for(int i=0;i<num;i++) {
328             names[i] = UConverterAlias.getAvailableName(i);
329         }
330         return names;
331     }
332 }
333