• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2011, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import java.util.ArrayList;
12 import java.util.Collection;
13 import java.util.EnumSet;
14 import java.util.Iterator;
15 import java.util.List;
16 import java.util.Set;
17 import java.util.TreeSet;
18 
19 import com.ibm.icu.impl.Utility;
20 import com.ibm.icu.text.UnicodeSet;
21 
22 public class LocaleIDParser {
23     /**
24      * @return Returns the language.
25      */
getLanguage()26     public String getLanguage() {
27         return language;
28     }
29 
30     /**
31      * @return Returns the language.
32      */
getLanguageScript()33     public String getLanguageScript() {
34         if (script.length() != 0) return language + "_" + script;
35         return language;
36     }
37 
getLanguageScript(Collection<String> in)38     public static Set<String> getLanguageScript(Collection<String> in) {
39         return getLanguageScript(in, null);
40     }
41 
getLanguageScript(Collection<String> in, Set<String> output)42     public static Set<String> getLanguageScript(Collection<String> in, Set<String> output) {
43         if (output == null) output = new TreeSet<>();
44         LocaleIDParser lparser = new LocaleIDParser();
45         for (Iterator<String> it = in.iterator(); it.hasNext();) {
46             output.add(lparser.set(it.next()).getLanguageScript());
47         }
48         return output;
49     }
50 
51     /**
52      * @return Returns the region.
53      */
getRegion()54     public String getRegion() {
55         return region;
56     }
57 
58     /**
59      * @return Returns the script.
60      */
getScript()61     public String getScript() {
62         return script;
63     }
64 
65     /**
66      * @return Returns the variants.
67      */
getVariants()68     public String[] getVariants() {
69         return variants.clone();
70     }
71 
72     // TODO, update to RFC3066
73     // http://www.inter-locale.com/ID/draft-phillips-langtags-08.html
74     private String language;
75     private String script;
76     private String region;
77     private String[] variants;
78 
79     static final UnicodeSet letters = new UnicodeSet("[a-zA-Z]");
80     static final UnicodeSet digits = new UnicodeSet("[0-9]");
81 
set(String localeID)82     public LocaleIDParser set(String localeID) {
83         region = script = "";
84         variants = new String[0];
85 
86         String[] pieces = new String[100]; // fix limitation later
87         Utility.split(localeID, '_', pieces);
88         int i = 0;
89         language = pieces[i++];
90         if (i >= pieces.length) return this;
91         if (pieces[i].length() == 4) {
92             script = pieces[i++];
93             if (i >= pieces.length) return this;
94         }
95         if (pieces[i].length() == 2 && letters.containsAll(pieces[i])
96             || pieces[i].length() == 3 && digits.containsAll(pieces[i])) {
97             region = pieces[i++];
98             if (i >= pieces.length) return this;
99         }
100         List<String> al = new ArrayList<>();
101         while (i < pieces.length && pieces[i].length() > 0) {
102             al.add(pieces[i++]);
103         }
104         variants = new String[al.size()];
105         al.toArray(variants);
106         return this;
107     }
108 
109     /**
110      * Get the parent of a locale. If the input is "root", then return null.
111      * For example, if localeName is "fr_CA", return "fr".
112      *
113      * Only works on canonical locale names (right casing, etc.)!
114      *
115      * Formerly this function returned an empty string when localeName was "_VETTING".
116      * Now it returns "root" where it would have returned an empty string.
117      * TODO: explain "__VETTING", somehow related to SUMMARY_LOCALE. Note that
118      * CLDRLocale.process() changes "__" to "_" before this function is called.
119      * Reference: https://unicode-org.atlassian.net/browse/CLDR-13133
120      */
getParent(String localeName)121     public static final String getParent(String localeName) {
122         return getParent(localeName, false);
123     }
124 
125     /**
126      * Get the parent of a locale. If the input is "root", then return null.
127      * For example, if localeName is "fr_CA", return "fr".
128      *
129      * Only works on canonical locale names (right casing, etc.)!
130      *
131      * Formerly this function returned an empty string when localeName was "_VETTING".
132      * Now it returns "root" where it would have returned an empty string.
133      * TODO: explain "__VETTING", somehow related to SUMMARY_LOCALE. Note that
134      * CLDRLocale.process() changes "__" to "_" before this function is called.
135      * Reference: https://unicode-org.atlassian.net/browse/CLDR-13133
136      * @param ignoreParentLocale true of the parentLocale and default script behavior should be ignored (such as with collation)
137      */
getParent(String localeName, boolean ignoreParentLocale)138     public static String getParent(String localeName, boolean ignoreParentLocale) {
139         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
140         if (!ignoreParentLocale) {
141             String explicitParent = sdi.getExplicitParentLocale(localeName);
142             if (explicitParent != null) {
143                 return explicitParent;
144             }
145         }
146         int pos = localeName.lastIndexOf('_');
147         if (pos >= 0) {
148             String truncated = localeName.substring(0, pos);
149             // if the final item is a script, and it is not the default content, then go directly to root
150             int pos2 = getScriptPosition(localeName);
151             if (pos2 > 0 && !ignoreParentLocale) {
152                 String script = localeName.substring(pos + 1);
153                 String defaultScript = sdi.getDefaultScript(truncated);
154                 if (!script.equals(defaultScript)) {
155                     return "root";
156                 }
157             }
158             if (truncated.length() == 0) {
159                 return "root";
160             }
161             return truncated;
162         }
163         if (localeName.equals("root")) {
164             return null;
165         }
166         return "root";
167     }
168 
169     /**
170      * Return the base language subtag: en_US => en, en_Latn_US => en, en => en, root => root
171      * @param localeID
172      * @return
173      */
getSimpleBaseLanguage(String localeID)174     public static String getSimpleBaseLanguage(String localeID) {
175         int pos = localeID.indexOf('_');
176         if (pos >= 0) {
177             return localeID.substring(0,pos);
178         }
179         return localeID;
180     }
181 
182     /**
183      * If the locale consists of baseLanguage+script, return the position of the separator, otherwise -1.
184      * @param s
185      */
getScriptPosition(String locale)186     public static int getScriptPosition(String locale) {
187         int pos = locale.indexOf('_');
188         if (pos >= 0 && pos + 5 == locale.length()) {
189             int pos2 = locale.indexOf('_', pos + 1);
190             if (pos2 < 0) {
191                 return pos;
192             }
193         }
194         return -1;
195     }
196 
197     /**
198      * Utility to get the simple parent of a locale. If the input is "root", then the output is null.
199      * This method is similar to the getParent() method above, except that it does NOT pay any attention
200      * to the explicit parent locales information. Thus, getParent("zh_Hant") will return "root",
201      * but getSimpleParent("zh_Hant") would return "zh".
202      */
getSimpleParent(String localeName)203     public static String getSimpleParent(String localeName) {
204         int pos = localeName.lastIndexOf('_');
205         if (pos >= 0) {
206             return localeName.substring(0, pos);
207         }
208         if (localeName.equals("root") || localeName.equals(CLDRFile.SUPPLEMENTAL_NAME)) return null;
209         return "root";
210     }
211 
setLanguage(String language)212     public LocaleIDParser setLanguage(String language) {
213         this.language = language;
214         return this;
215     }
216 
setRegion(String region)217     public LocaleIDParser setRegion(String region) {
218         this.region = region;
219         return this;
220     }
221 
setScript(String script)222     public LocaleIDParser setScript(String script) {
223         this.script = script;
224         return this;
225     }
226 
setVariants(String[] variants)227     public LocaleIDParser setVariants(String[] variants) {
228         this.variants = variants.clone();
229         return this;
230     }
231 
232     public enum Level {
233         Language, Script, Region, Variants, Other
234     }
235 
236     /**
237      * Returns an int mask indicating the level
238      *
239      * @return (2 if script is present) + (4 if region is present) + (8 if region is present)
240      */
getLevels()241     public Set<Level> getLevels() {
242         EnumSet<Level> result = EnumSet.of(Level.Language);
243         if (getScript().length() != 0) result.add(Level.Script);
244         if (getRegion().length() != 0) result.add(Level.Region);
245         if (getVariants().length != 0) result.add(Level.Variants);
246         return result;
247     }
248 
getSiblings(Set<String> set)249     public Set<String> getSiblings(Set<String> set) {
250         Set<Level> myLevel = getLevels();
251         String localeID = toString();
252         String parentID = getParent(localeID);
253 
254         String prefix = (parentID == null || "root".equals(parentID)) ? "" : parentID + "_";
255         Set<String> siblings = new TreeSet<>();
256         for (String id : set) {
257             if (id.startsWith(prefix) && set(id).getLevels().equals(myLevel)) {
258                 siblings.add(id);
259             }
260         }
261         set(localeID); // leave in known state
262         return siblings;
263     }
264 
265     @Override
toString()266     public String toString() {
267         StringBuffer result = new StringBuffer(language);
268         if (script.length() != 0) result.append('_').append(script);
269         if (region.length() != 0) result.append('_').append(region);
270         if (variants != null) {
271             for (int i = 0; i < variants.length; ++i) {
272                 result.append('_').append(variants[i]);
273             }
274         }
275         return result.toString();
276     }
277 }
278