• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 ******************************************************************************
6 * Copyright (C) 2003-2011, International Business Machines Corporation and   *
7 * others. All Rights Reserved.                                               *
8 ******************************************************************************
9 */
10 
11 package ohos.global.icu.impl;
12 
13 import java.util.Collections;
14 import java.util.Comparator;
15 import java.util.Iterator;
16 import java.util.Map;
17 import java.util.TreeMap;
18 
19 import ohos.global.icu.impl.locale.AsciiUtil;
20 
21 /**
22  * Utility class to parse and normalize locale ids (including POSIX style)
23  * @hide exposed on OHOS
24  */
25 public final class LocaleIDParser {
26 
27     /**
28      * Char array representing the locale ID.
29      */
30     private char[] id;
31 
32     /**
33      * Current position in {@link #id} (while parsing).
34      */
35     private int index;
36 
37     /**
38      * Temporary buffer for parsed sections of data.
39      */
40     private StringBuilder buffer;
41 
42     // um, don't handle POSIX ids unless we request it.  why not?  well... because.
43     private boolean canonicalize;
44     private boolean hadCountry;
45 
46     // used when canonicalizing
47     Map<String, String> keywords;
48     String baseName;
49 
50     /**
51      * Parsing constants.
52      */
53     private static final char KEYWORD_SEPARATOR     = '@';
54     private static final char HYPHEN                = '-';
55     private static final char KEYWORD_ASSIGN        = '=';
56     private static final char COMMA                 = ',';
57     private static final char ITEM_SEPARATOR        = ';';
58     private static final char DOT                   = '.';
59     private static final char UNDERSCORE            = '_';
60 
LocaleIDParser(String localeID)61     public LocaleIDParser(String localeID) {
62         this(localeID, false);
63     }
64 
LocaleIDParser(String localeID, boolean canonicalize)65     public LocaleIDParser(String localeID, boolean canonicalize) {
66         id = localeID.toCharArray();
67         index = 0;
68         buffer = new StringBuilder(id.length + 5);
69         this.canonicalize = canonicalize;
70     }
71 
reset()72     private void reset() {
73         index = 0;
74         buffer = new StringBuilder(id.length + 5);
75     }
76 
77     // utilities for working on text in the buffer
78 
79     /**
80      * Append c to the buffer.
81      */
append(char c)82     private void append(char c) {
83         buffer.append(c);
84     }
85 
addSeparator()86     private void addSeparator() {
87         append(UNDERSCORE);
88     }
89 
90     /**
91      * Returns the text in the buffer from start to blen as a String.
92      */
getString(int start)93     private String getString(int start) {
94         return buffer.substring(start);
95     }
96 
97     /**
98      * Set the length of the buffer to pos, then append the string.
99      */
set(int pos, String s)100     private void set(int pos, String s) {
101         buffer.delete(pos, buffer.length());
102         buffer.insert(pos, s);
103     }
104 
105     /**
106      * Append the string to the buffer.
107      */
append(String s)108     private void append(String s) {
109         buffer.append(s);
110     }
111 
112     // utilities for parsing text out of the id
113 
114     /**
115      * Character to indicate no more text is available in the id.
116      */
117     private static final char DONE = '\uffff';
118 
119     /**
120      * Returns the character at index in the id, and advance index.  The returned character
121      * is DONE if index was at the limit of the buffer.  The index is advanced regardless
122      * so that decrementing the index will always 'unget' the last character returned.
123      */
next()124     private char next() {
125         if (index == id.length) {
126             index++;
127             return DONE;
128         }
129 
130         return id[index++];
131     }
132 
133     /**
134      * Advance index until the next terminator or id separator, and leave it there.
135      */
skipUntilTerminatorOrIDSeparator()136     private void skipUntilTerminatorOrIDSeparator() {
137         while (!isTerminatorOrIDSeparator(next()));
138         --index;
139     }
140 
141     /**
142      * Returns true if the character at index in the id is a terminator.
143      */
atTerminator()144     private boolean atTerminator() {
145         return index >= id.length || isTerminator(id[index]);
146     }
147 
148     /**
149      * Returns true if the character is a terminator (keyword separator, dot, or DONE).
150      * Dot is a terminator because of the POSIX form, where dot precedes the codepage.
151      */
isTerminator(char c)152     private boolean isTerminator(char c) {
153         // always terminate at DOT, even if not handling POSIX.  It's an error...
154         return c == KEYWORD_SEPARATOR || c == DONE || c == DOT;
155     }
156 
157     /**
158      * Returns true if the character is a terminator or id separator.
159      */
isTerminatorOrIDSeparator(char c)160     private boolean isTerminatorOrIDSeparator(char c) {
161         return c == UNDERSCORE || c == HYPHEN || isTerminator(c);
162     }
163 
164     /**
165      * Returns true if the start of the buffer has an experimental or private language
166      * prefix, the pattern '[ixIX][-_].' shows the syntax checked.
167      */
haveExperimentalLanguagePrefix()168     private boolean haveExperimentalLanguagePrefix() {
169         if (id.length > 2) {
170             char c = id[1];
171             if (c == HYPHEN || c == UNDERSCORE) {
172                 c = id[0];
173                 return c == 'x' || c == 'X' || c == 'i' || c == 'I';
174             }
175         }
176         return false;
177     }
178 
179     /**
180      * Returns true if a value separator occurs at or after index.
181      */
haveKeywordAssign()182     private boolean haveKeywordAssign() {
183         // assume it is safe to start from index
184         for (int i = index; i < id.length; ++i) {
185             if (id[i] == KEYWORD_ASSIGN) {
186                 return true;
187             }
188         }
189         return false;
190     }
191 
192     /**
193      * Advance index past language, and accumulate normalized language code in buffer.
194      * Index must be at 0 when this is called.  Index is left at a terminator or id
195      * separator.  Returns the start of the language code in the buffer.
196      */
parseLanguage()197     private int parseLanguage() {
198         int startLength = buffer.length();
199 
200         if (haveExperimentalLanguagePrefix()) {
201             append(AsciiUtil.toLower(id[0]));
202             append(HYPHEN);
203             index = 2;
204         }
205 
206         char c;
207         while(!isTerminatorOrIDSeparator(c = next())) {
208             append(AsciiUtil.toLower(c));
209         }
210         --index; // unget
211 
212         if (buffer.length() - startLength == 3) {
213             String lang = LocaleIDs.threeToTwoLetterLanguage(getString(0));
214             if (lang != null) {
215                 set(0, lang);
216             }
217         }
218 
219         return 0;
220     }
221 
222     /**
223      * Advance index past language.  Index must be at 0 when this is called.  Index
224      * is left at a terminator or id separator.
225      */
skipLanguage()226     private void skipLanguage() {
227         if (haveExperimentalLanguagePrefix()) {
228             index = 2;
229         }
230         skipUntilTerminatorOrIDSeparator();
231     }
232 
233     /**
234      * Advance index past script, and accumulate normalized script in buffer.
235      * Index must be immediately after the language.
236      * If the item at this position is not a script (is not four characters
237      * long) leave index and buffer unchanged.  Otherwise index is left at
238      * a terminator or id separator.  Returns the start of the script code
239      * in the buffer (this may be equal to the buffer length, if there is no
240      * script).
241      */
parseScript()242     private int parseScript() {
243         if (!atTerminator()) {
244             int oldIndex = index; // save original index
245             ++index;
246 
247             int oldBlen = buffer.length(); // get before append hyphen, if we truncate everything is undone
248             char c;
249             boolean firstPass = true;
250             while(!isTerminatorOrIDSeparator(c = next()) && AsciiUtil.isAlpha(c)) {
251                 if (firstPass) {
252                     addSeparator();
253                     append(AsciiUtil.toUpper(c));
254                     firstPass = false;
255                 } else {
256                     append(AsciiUtil.toLower(c));
257                 }
258             }
259             --index; // unget
260 
261             /* If it's not exactly 4 characters long, then it's not a script. */
262             if (index - oldIndex != 5) { // +1 to account for separator
263                 index = oldIndex;
264                 buffer.delete(oldBlen, buffer.length());
265             } else {
266                 oldBlen++; // index past hyphen, for clients who want to extract just the script
267             }
268 
269             return oldBlen;
270         }
271         return buffer.length();
272     }
273 
274     /**
275      * Advance index past script.
276      * Index must be immediately after the language and IDSeparator.
277      * If the item at this position is not a script (is not four characters
278      * long) leave index.  Otherwise index is left at a terminator or
279      * id separator.
280      */
skipScript()281     private void skipScript() {
282         if (!atTerminator()) {
283             int oldIndex = index;
284             ++index;
285 
286             char c;
287             while (!isTerminatorOrIDSeparator(c = next()) && AsciiUtil.isAlpha(c));
288             --index;
289 
290             if (index - oldIndex != 5) { // +1 to account for separator
291                 index = oldIndex;
292             }
293         }
294     }
295 
296     /**
297      * Advance index past country, and accumulate normalized country in buffer.
298      * Index must be immediately after the script (if there is one, else language)
299      * and IDSeparator.  Return the start of the country code in the buffer.
300      */
parseCountry()301     private int parseCountry() {
302         if (!atTerminator()) {
303             int oldIndex = index;
304             ++index;
305 
306             int oldBlen = buffer.length();
307             char c;
308             boolean firstPass = true;
309             while (!isTerminatorOrIDSeparator(c = next())) {
310                 if (firstPass) { // first, add hyphen
311                     hadCountry = true; // we have a country, let variant parsing know
312                     addSeparator();
313                     ++oldBlen; // increment past hyphen
314                     firstPass = false;
315                 }
316                 append(AsciiUtil.toUpper(c));
317             }
318             --index; // unget
319 
320             int charsAppended = buffer.length() - oldBlen;
321 
322             if (charsAppended == 0) {
323                 // Do nothing.
324             }
325             else if (charsAppended < 2 || charsAppended > 3) {
326                 // It's not a country, so return index and blen to
327                 // their previous values.
328                 index = oldIndex;
329                 --oldBlen;
330                 buffer.delete(oldBlen, buffer.length());
331                 hadCountry = false;
332             }
333             else if (charsAppended == 3) {
334                 String region = LocaleIDs.threeToTwoLetterRegion(getString(oldBlen));
335                 if (region != null) {
336                     set(oldBlen, region);
337                 }
338             }
339 
340             return oldBlen;
341         }
342 
343         return buffer.length();
344     }
345 
346     /**
347      * Advance index past country.
348      * Index must be immediately after the script (if there is one, else language)
349      * and IDSeparator.
350      */
skipCountry()351     private void skipCountry() {
352         if (!atTerminator()) {
353             if (id[index] == UNDERSCORE || id[index] == HYPHEN) {
354                 ++index;
355             }
356             /*
357              * Save the index point after the separator, since the format
358              * requires two separators if the country is not present.
359              */
360             int oldIndex = index;
361 
362             skipUntilTerminatorOrIDSeparator();
363             int charsSkipped = index - oldIndex;
364             if (charsSkipped < 2 || charsSkipped > 3) {
365                 index = oldIndex;
366             }
367         }
368     }
369 
370     /**
371      * Advance index past variant, and accumulate normalized variant in buffer.  This ignores
372      * the codepage information from POSIX ids.  Index must be immediately after the country
373      * or script.  Index is left at the keyword separator or at the end of the text.  Return
374      * the start of the variant code in the buffer.
375      *
376      * In standard form, we can have the following forms:
377      * ll__VVVV
378      * ll_CC_VVVV
379      * ll_Ssss_VVVV
380      * ll_Ssss_CC_VVVV
381      *
382      * This also handles POSIX ids, which can have the following forms (pppp is code page id):
383      * ll_CC.pppp          --> ll_CC
384      * ll_CC.pppp@VVVV     --> ll_CC_VVVV
385      * ll_CC@VVVV          --> ll_CC_VVVV
386      *
387      * We identify this use of '@' in POSIX ids by looking for an '=' following
388      * the '@'.  If there is one, we consider '@' to start a keyword list, instead of
389      * being part of a POSIX id.
390      *
391      * Note:  since it was decided that we want an option to not handle POSIX ids, this
392      * becomes a bit more complex.
393      */
parseVariant()394     private int parseVariant() {
395         int oldBlen = buffer.length();
396 
397         boolean start = true;
398         boolean needSeparator = true;
399         boolean skipping = false;
400         char c;
401         boolean firstPass = true;
402 
403         while ((c = next()) != DONE) {
404             if (c == DOT) {
405                 start = false;
406                 skipping = true;
407             } else if (c == KEYWORD_SEPARATOR) {
408                 if (haveKeywordAssign()) {
409                     break;
410                 }
411                 skipping = false;
412                 start = false;
413                 needSeparator = true; // add another underscore if we have more text
414             } else if (start) {
415                 start = false;
416                 if (c != UNDERSCORE && c != HYPHEN) {
417                     index--;
418                 }
419             } else if (!skipping) {
420                 if (needSeparator) {
421                     needSeparator = false;
422                     if (firstPass && !hadCountry) { // no country, we'll need two
423                         addSeparator();
424                         ++oldBlen; // for sure
425                     }
426                     addSeparator();
427                     if (firstPass) { // only for the first separator
428                         ++oldBlen;
429                         firstPass = false;
430                     }
431                 }
432                 c = AsciiUtil.toUpper(c);
433                 if (c == HYPHEN || c == COMMA) {
434                     c = UNDERSCORE;
435                 }
436                 append(c);
437             }
438         }
439         --index; // unget
440 
441         return oldBlen;
442     }
443 
444     // no need for skipvariant, to get the keywords we'll just scan directly for
445     // the keyword separator
446 
447     /**
448      * Returns the normalized language id, or the empty string.
449      */
getLanguage()450     public String getLanguage() {
451         reset();
452         return getString(parseLanguage());
453     }
454 
455     /**
456      * Returns the normalized script id, or the empty string.
457      */
getScript()458     public String getScript() {
459         reset();
460         skipLanguage();
461         return getString(parseScript());
462     }
463 
464     /**
465      * return the normalized country id, or the empty string.
466      */
getCountry()467     public String getCountry() {
468         reset();
469         skipLanguage();
470         skipScript();
471         return getString(parseCountry());
472     }
473 
474     /**
475      * Returns the normalized variant id, or the empty string.
476      */
getVariant()477     public String getVariant() {
478         reset();
479         skipLanguage();
480         skipScript();
481         skipCountry();
482         return getString(parseVariant());
483     }
484 
485     /**
486      * Returns the language, script, country, and variant as separate strings.
487      */
getLanguageScriptCountryVariant()488     public String[] getLanguageScriptCountryVariant() {
489         reset();
490         return new String[] {
491                 getString(parseLanguage()),
492                 getString(parseScript()),
493                 getString(parseCountry()),
494                 getString(parseVariant())
495         };
496     }
497 
setBaseName(String baseName)498     public void setBaseName(String baseName) {
499         this.baseName = baseName;
500     }
501 
parseBaseName()502     public void parseBaseName() {
503         if (baseName != null) {
504             set(0, baseName);
505         } else {
506             reset();
507             parseLanguage();
508             parseScript();
509             parseCountry();
510             parseVariant();
511 
512             // catch unwanted trailing underscore after country if there was no variant
513             int len = buffer.length();
514             if (len > 0 && buffer.charAt(len - 1) == UNDERSCORE) {
515                 buffer.deleteCharAt(len - 1);
516             }
517         }
518     }
519 
520     /**
521      * Returns the normalized base form of the locale id.  The base
522      * form does not include keywords.
523      */
getBaseName()524     public String getBaseName() {
525         if (baseName != null) {
526             return baseName;
527         }
528         parseBaseName();
529         return getString(0);
530     }
531 
532     /**
533      * Returns the normalized full form of the locale id.  The full
534      * form includes keywords if they are present.
535      */
getName()536     public String getName() {
537         parseBaseName();
538         parseKeywords();
539         return getString(0);
540     }
541 
542     // keyword utilities
543 
544     /**
545      * If we have keywords, advance index to the start of the keywords and return true,
546      * otherwise return false.
547      */
setToKeywordStart()548     private boolean setToKeywordStart() {
549         for (int i = index; i < id.length; ++i) {
550             if (id[i] == KEYWORD_SEPARATOR) {
551                 if (canonicalize) {
552                     for (int j = ++i; j < id.length; ++j) { // increment i past separator for return
553                         if (id[j] == KEYWORD_ASSIGN) {
554                             index = i;
555                             return true;
556                         }
557                     }
558                 } else {
559                     if (++i < id.length) {
560                         index = i;
561                         return true;
562                     }
563                 }
564                 break;
565             }
566         }
567         return false;
568     }
569 
isDoneOrKeywordAssign(char c)570     private static boolean isDoneOrKeywordAssign(char c) {
571         return c == DONE || c == KEYWORD_ASSIGN;
572     }
573 
isDoneOrItemSeparator(char c)574     private static boolean isDoneOrItemSeparator(char c) {
575         return c == DONE || c == ITEM_SEPARATOR;
576     }
577 
getKeyword()578     private String getKeyword() {
579         int start = index;
580         while (!isDoneOrKeywordAssign(next())) {
581         }
582         --index;
583         return AsciiUtil.toLowerString(new String(id, start, index-start).trim());
584     }
585 
getValue()586     private String getValue() {
587         int start = index;
588         while (!isDoneOrItemSeparator(next())) {
589         }
590         --index;
591         return new String(id, start, index-start).trim(); // leave case alone
592     }
593 
getKeyComparator()594     private Comparator<String> getKeyComparator() {
595         final Comparator<String> comp = new Comparator<String>() {
596             @Override
597             public int compare(String lhs, String rhs) {
598                 return lhs.compareTo(rhs);
599             }
600         };
601         return comp;
602     }
603 
604     /**
605      * Returns a map of the keywords and values, or null if there are none.
606      */
getKeywordMap()607     public Map<String, String> getKeywordMap() {
608         if (keywords == null) {
609             TreeMap<String, String> m = null;
610             if (setToKeywordStart()) {
611                 // trim spaces and convert to lower case, both keywords and values.
612                 do {
613                     String key = getKeyword();
614                     if (key.length() == 0) {
615                         break;
616                     }
617                     char c = next();
618                     if (c != KEYWORD_ASSIGN) {
619                         // throw new IllegalArgumentException("key '" + key + "' missing a value.");
620                         if (c == DONE) {
621                             break;
622                         } else {
623                             continue;
624                         }
625                     }
626                     String value = getValue();
627                     if (value.length() == 0) {
628                         // throw new IllegalArgumentException("key '" + key + "' missing a value.");
629                         continue;
630                     }
631                     if (m == null) {
632                         m = new TreeMap<String, String>(getKeyComparator());
633                     } else if (m.containsKey(key)) {
634                         // throw new IllegalArgumentException("key '" + key + "' already has a value.");
635                         continue;
636                     }
637                     m.put(key, value);
638                 } while (next() == ITEM_SEPARATOR);
639             }
640             keywords = m != null ? m : Collections.<String, String>emptyMap();
641         }
642 
643         return keywords;
644     }
645 
646 
647     /**
648      * Parse the keywords and return start of the string in the buffer.
649      */
parseKeywords()650     private int parseKeywords() {
651         int oldBlen = buffer.length();
652         Map<String, String> m = getKeywordMap();
653         if (!m.isEmpty()) {
654             boolean first = true;
655             for (Map.Entry<String, String> e : m.entrySet()) {
656                 append(first ? KEYWORD_SEPARATOR : ITEM_SEPARATOR);
657                 first = false;
658                 append(e.getKey());
659                 append(KEYWORD_ASSIGN);
660                 append(e.getValue());
661             }
662             if (first == false) {
663                 ++oldBlen;
664             }
665         }
666         return oldBlen;
667     }
668 
669     /**
670      * Returns an iterator over the keywords, or null if we have an empty map.
671      */
getKeywords()672     public Iterator<String> getKeywords() {
673         Map<String, String> m = getKeywordMap();
674         return m.isEmpty() ? null : m.keySet().iterator();
675     }
676 
677     /**
678      * Returns the value for the named keyword, or null if the keyword is not
679      * present.
680      */
getKeywordValue(String keywordName)681     public String getKeywordValue(String keywordName) {
682         Map<String, String> m = getKeywordMap();
683         return m.isEmpty() ? null : m.get(AsciiUtil.toLowerString(keywordName.trim()));
684     }
685 
686     /**
687      * Set the keyword value only if it is not already set to something else.
688      */
defaultKeywordValue(String keywordName, String value)689     public void defaultKeywordValue(String keywordName, String value) {
690         setKeywordValue(keywordName, value, false);
691     }
692 
693     /**
694      * Set the value for the named keyword, or unset it if value is null.  If
695      * keywordName itself is null, unset all keywords.  If keywordName is not null,
696      * value must not be null.
697      */
setKeywordValue(String keywordName, String value)698     public void setKeywordValue(String keywordName, String value) {
699         setKeywordValue(keywordName, value, true);
700     }
701 
702     /**
703      * Set the value for the named keyword, or unset it if value is null.  If
704      * keywordName itself is null, unset all keywords.  If keywordName is not null,
705      * value must not be null.  If reset is true, ignore any previous value for
706      * the keyword, otherwise do not change the keyword (including removal of
707      * one or all keywords).
708      */
setKeywordValue(String keywordName, String value, boolean reset)709     private void setKeywordValue(String keywordName, String value, boolean reset) {
710         if (keywordName == null) {
711             if (reset) {
712                 // force new map, ignore value
713                 keywords = Collections.<String, String>emptyMap();
714             }
715         } else {
716             keywordName = AsciiUtil.toLowerString(keywordName.trim());
717             if (keywordName.length() == 0) {
718                 throw new IllegalArgumentException("keyword must not be empty");
719             }
720             if (value != null) {
721                 value = value.trim();
722                 if (value.length() == 0) {
723                     throw new IllegalArgumentException("value must not be empty");
724                 }
725             }
726             Map<String, String> m = getKeywordMap();
727             if (m.isEmpty()) { // it is EMPTY_MAP
728                 if (value != null) {
729                     // force new map
730                     keywords = new TreeMap<String, String>(getKeyComparator());
731                     keywords.put(keywordName, value.trim());
732                 }
733             } else {
734                 if (reset || !m.containsKey(keywordName)) {
735                     if (value != null) {
736                         m.put(keywordName, value);
737                     } else {
738                         m.remove(keywordName);
739                         if (m.isEmpty()) {
740                             // force new map
741                             keywords = Collections.<String, String>emptyMap();
742                         }
743                     }
744                 }
745             }
746         }
747     }
748 }
749