• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2 *******************************************************************************
3 * Copyright (C) 2006-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7 
8 package com.ibm.icu.charset;
9 
10 import java.io.IOException;
11 import java.nio.charset.Charset;
12 import java.nio.charset.UnsupportedCharsetException;
13 import java.nio.charset.spi.CharsetProvider;
14 import java.util.Collections;
15 import java.util.Iterator;
16 import java.util.LinkedList;
17 import java.util.List;
18 
19 import com.ibm.icu.impl.InvalidFormatException;
20 
21 
22 /**
23  * A concrete subclass of CharsetProvider for loading and providing charset converters
24  * in ICU.
25  * @stable ICU 3.6
26  */
27 public final class CharsetProviderICU extends CharsetProvider{
28     /**
29      * List of available ICU Charsets, empty during static initialization.
30      * Not a Set or Map, so that we can add different Charset objects with the same name(),
31      * which means that they are .equals(). See ICU ticket #11493.
32      */
33     private static List<Charset> icuCharsets = Collections.<Charset>emptyList();
34 
35     /**
36      * Default constructor
37      * @stable ICU 3.6
38      */
CharsetProviderICU()39     public CharsetProviderICU() {
40     }
41 
42     /**
43      * Constructs a Charset for the given charset name.
44      * Implements the abstract method of super class.
45      * @param charsetName charset name
46      * @return Charset object for the given charset name, null if unsupported
47      * @stable ICU 3.6
48      */
charsetForName(String charsetName)49     public final Charset charsetForName(String charsetName){
50         try{
51             // extract the options from the charset name
52             String optionsString = "";
53             if (charsetName.endsWith(UConverterConstants.OPTION_SWAP_LFNL_STRING)) {
54                 /* Remove and save the swap lfnl option string portion of the charset name. */
55                 optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING;
56                 charsetName = charsetName.substring(0, charsetName.length() - optionsString.length());
57             }
58             // get the canonical name
59             String icuCanonicalName = getICUCanonicalName(charsetName);
60 
61             // create the converter object and return it
62             if(icuCanonicalName==null || icuCanonicalName.length()==0){
63                 // Try the original name, may be something added and not in the alias table.
64                 // Will get an unsupported encoding exception if it doesn't work.
65                 icuCanonicalName = charsetName;
66             }
67             return getCharset(icuCanonicalName, optionsString);
68         }catch(UnsupportedCharsetException ex){
69         }catch(IOException ex){
70         }
71         return null;
72     }
73 
74     /**
75      * Constructs a charset for the given ICU conversion table from the specified class path.
76      * Example use: <code>cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");</code>.
77      * In this example myConverter.cnv would exist in the com/myCompany/myDataPackage Java package.
78      * Conversion tables can be made with ICU4C's makeconv tool.
79      * This function allows you to allows you to load user defined conversion
80      * tables that are outside of ICU's core data.
81      * @param charsetName The name of the charset conversion table.
82      * @param classPath The class path that contain the conversion table.
83      * @return charset object for the given charset name, null if unsupported
84      * @stable ICU 3.8
85      */
charsetForName(String charsetName, String classPath)86     public final Charset charsetForName(String charsetName, String classPath) {
87         return charsetForName(charsetName, classPath, null);
88     }
89 
90     /**
91      * Constructs a charset for the given ICU conversion table from the specified class path.
92      * This function is similar to {@link #charsetForName(String, String)}.
93      * @param charsetName The name of the charset conversion table.
94      * @param classPath The class path that contain the conversion table.
95      * @param loader the class object from which to load the charset conversion table
96      * @return charset object for the given charset name, null if unsupported
97      * @stable ICU 3.8
98      */
charsetForName(String charsetName, String classPath, ClassLoader loader)99     public Charset charsetForName(String charsetName, String classPath, ClassLoader loader) {
100         CharsetMBCS cs = null;
101         try {
102              cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, loader);
103         } catch (InvalidFormatException e) {
104             // return null;
105         }
106         return cs;
107     }
108 
109     /**
110      * Gets the canonical name of the converter as defined by Java
111      * @param enc converter name
112      * @return canonical name of the converter
113      * @internal
114      * @deprecated This API is ICU internal only.
115      */
116      @Deprecated
getICUCanonicalName(String enc)117      public static final String getICUCanonicalName(String enc)
118                                 throws UnsupportedCharsetException{
119         String canonicalName = null;
120         String ret = null;
121         try{
122             if(enc!=null){
123                  if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){
124                     ret = canonicalName;
125                 } else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){
126                     ret = canonicalName;
127                 } else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){
128                     /* we have some aliases in the form x-blah .. match those */
129                     ret = canonicalName;
130                 }/*else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){
131                     ret = canonicalName;
132                 }*/else if(enc.indexOf("x-")==0 || enc.indexOf("X-")==0){
133                     /* TODO: Match with getJavaCanonicalName method */
134                     /*
135                     char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0};
136                     strcpy(temp, encName+2);
137                     */
138                     // Remove the 'x-' and get the ICU canonical name
139                     if ((canonicalName = UConverterAlias.getAlias(enc.substring(2), 0))!=null) {
140                         ret = canonicalName;
141                     } else {
142                         ret = "";
143                     }
144 
145                 }else{
146                     /* unsupported encoding */
147                    ret = "";
148                 }
149             }
150             return ret;
151         }catch(IOException ex){
152             throw new UnsupportedCharsetException(enc);
153         }
154     }
getCharset(String icuCanonicalName, String optionsString)155     private static final Charset getCharset(String icuCanonicalName, String optionsString)
156             throws IOException {
157        String[] aliases = getAliases(icuCanonicalName);
158        String canonicalName = getJavaCanonicalName(icuCanonicalName);
159 
160        /* Concat the option string to the icuCanonicalName so that the options can be handled properly
161         * by the actual charset.
162         */
163        return (CharsetICU.getCharset(icuCanonicalName + optionsString, canonicalName, aliases));
164     }
165     /**
166      * Gets the canonical name of the converter as defined by Java
167      * @param charsetName converter name
168      * @return canonical name of the converter
169      * @internal
170      * @deprecated This API is ICU internal only.
171      */
172     @Deprecated
getJavaCanonicalName(String charsetName)173     public static String getJavaCanonicalName(String charsetName){
174         /*
175         If a charset listed in the IANA Charset Registry is supported by an implementation
176         of the Java platform then its canonical name must be the name listed in the registry.
177         Many charsets are given more than one name in the registry, in which case the registry
178         identifies one of the names as MIME-preferred. If a charset has more than one registry
179         name then its canonical name must be the MIME-preferred name and the other names in
180         the registry must be valid aliases. If a supported charset is not listed in the IANA
181         registry then its canonical name must begin with one of the strings "X-" or "x-".
182         */
183         if(charsetName==null ){
184             return null;
185         }
186         try{
187             String cName = null;
188             /* find out the alias with MIME tag */
189             if((cName=UConverterAlias.getStandardName(charsetName, "MIME"))!=null){
190             /* find out the alias with IANA tag */
191             }else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){
192             }else {
193                 /*
194                     check to see if an alias already exists with x- prefix, if yes then
195                     make that the canonical name
196                 */
197                 int aliasNum = UConverterAlias.countAliases(charsetName);
198                 String name;
199                 for(int i=0;i<aliasNum;i++){
200                     name = UConverterAlias.getAlias(charsetName, i);
201                     if(name!=null && name.indexOf("x-")==0){
202                         cName = name;
203                         break;
204                     }
205                 }
206                 /* last resort just append x- to any of the alias and
207                 make it the canonical name */
208                 if((cName==null || cName.length()==0)){
209                     name = UConverterAlias.getStandardName(charsetName, "UTR22");
210                     if(name==null && charsetName.indexOf(",")!=-1){
211                         name = UConverterAlias.getAlias(charsetName, 1);
212                     }
213                     /* if there is no UTR22 canonical name .. then just return itself*/
214                     if(name==null){
215                         name = charsetName;
216                     }
217                     cName = "x-"+ name;
218                 }
219             }
220             return cName;
221         }catch (IOException ex){
222 
223         }
224         return null;
225      }
226 
227     /**
228      * Gets the aliases associated with the converter name
229      * @param encName converter name
230      * @return converter names as elements in an object array
231      * @internal
232      * @deprecated This API is ICU internal only.
233      */
234     @Deprecated
getAliases(String encName)235     private static final String[] getAliases(String encName)throws IOException{
236         String[] ret = null;
237         int aliasNum = 0;
238         int i=0;
239         int j=0;
240         String aliasArray[/*50*/] = new String[50];
241 
242         if(encName != null){
243             aliasNum = UConverterAlias.countAliases(encName);
244             for(i=0,j=0;i<aliasNum;i++){
245                 String name = UConverterAlias.getAlias(encName,i);
246                 if(name.indexOf(',')==-1){
247                     aliasArray[j++]= name;
248                 }
249             }
250             ret = new String[j];
251             for(;--j>=0;) {
252                 ret[j] = aliasArray[j];
253             }
254 
255         }
256         return (ret);
257 
258     }
259 
260     /**
261      * Lazy-init the icuCharsets list.
262      * Could be done during static initialization if constructing all of the Charsets
263      * were cheap enough. See ICU ticket #11481.
264      */
loadAvailableICUCharsets()265     private static final synchronized void loadAvailableICUCharsets() {
266         if (!icuCharsets.isEmpty()) {
267             return;
268         }
269         List<Charset> icucs = new LinkedList<Charset>();
270         int num = UConverterAlias.countAvailable();
271         for (int i = 0; i < num; ++i) {
272             String name = UConverterAlias.getAvailableName(i);
273             try {
274                 Charset cs = getCharset(name, "");
275                 icucs.add(cs);
276             } catch(UnsupportedCharsetException ex) {
277             } catch(IOException e) {
278             }
279             // add only charsets that can be created!
280         }
281         // Unmodifiable so that charsets().next().remove() cannot change it.
282         icuCharsets = Collections.unmodifiableList(icucs);
283     }
284 
285     /**
286      * Returns an iterator for the available ICU Charsets.
287      * Implements the abstract method of super class.
288      * @return the Charset iterator
289      * @stable ICU 3.6
290      */
charsets()291     public final Iterator<Charset> charsets() {
292         loadAvailableICUCharsets();
293         return icuCharsets.iterator();
294     }
295 
296     /**
297      * Gets the canonical names of available ICU converters
298      * @return array of available converter names
299      * @internal
300      * @deprecated This API is ICU internal only.
301      */
302     @Deprecated
getAvailableNames()303      public static final String[] getAvailableNames() {
304         loadAvailableICUCharsets();
305         String[] names = new String[icuCharsets.size()];
306         int i = 0;
307         for (Charset cs : icuCharsets) {
308             names[i++] = cs.name();
309         }
310         return names;
311     }
312 
313     /**
314      * Return all names available
315      * @return String[] an array of all available names
316      * @internal
317      * @deprecated This API is ICU internal only.
318      */
319     @Deprecated
getAllNames()320      public static final String[] getAllNames(){
321         int num = UConverterAlias.countAvailable();
322         String[] names = new String[num];
323         for(int i=0;i<num;i++) {
324             names[i] = UConverterAlias.getAvailableName(i);
325         }
326         return names;
327     }
328 }
329