1 package org.unicode.cldr.util; 2 3 import com.ibm.icu.impl.Relation; 4 import java.util.ArrayList; 5 import java.util.Collections; 6 import java.util.List; 7 import java.util.Locale; 8 import java.util.Set; 9 import java.util.SortedMap; 10 import java.util.TreeMap; 11 import java.util.TreeSet; 12 import java.util.regex.Matcher; 13 import java.util.regex.Pattern; 14 15 public class UExtension { 16 static SupplementalDataInfo data = 17 SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); 18 19 static Pattern SEP = PatternCache.get("[-_]"); 20 static Pattern SPACE = PatternCache.get("\\s"); 21 static Pattern ALPHANUM = PatternCache.get("[0-9A-Za-z]{2,8}"); 22 static Pattern CODEPOINTS = 23 PatternCache.get( 24 "(10|[0-9A-Fa-f])?[0-9A-Fa-f]{4}(\\s(10|[0-9A-Fa-f])?[0-9A-Fa-f]{4})*"); 25 static Relation<String, String> validKeyTypes = data.getBcp47Keys(); 26 27 private boolean validating = false; 28 private SortedMap<String, List<String>> keyTypes = new TreeMap<>(); 29 private Set<String> attributes = new TreeSet<>(); 30 getKeys()31 public Set<String> getKeys() { 32 return keyTypes.keySet(); 33 } 34 getTypes(String key)35 public List<String> getTypes(String key) { 36 return keyTypes.get(key); 37 } 38 getAttributes()39 public Set<String> getAttributes() { 40 return attributes; 41 } 42 isValidating()43 public boolean isValidating() { 44 return validating; 45 } 46 setValidating(boolean validating)47 public UExtension setValidating(boolean validating) { 48 this.validating = validating; 49 return this; 50 } 51 52 /** 53 * Parses the subtags after the -u- 54 * 55 * @param source 56 * @return 57 */ parse(String source)58 public UExtension parse(String source) { 59 // the subtags that are up to the first two letter are attributes 60 String key = null; 61 List<String> list = null; 62 Set<String> validSubtypes = null; 63 Matcher alphanum = ALPHANUM.matcher(""); 64 65 for (String subtag : SEP.split(source)) { 66 if (!alphanum.reset(subtag).matches()) { 67 throw new IllegalArgumentException( 68 "Invalid subtag contents, must be [0-9 A-Z a-z]{2,8}: " + subtag); 69 } 70 subtag = subtag.toLowerCase(Locale.ENGLISH); // normalize 71 if (subtag.length() == 2) { // key 72 if (list != null) { // check size of previous list 73 if (list.size() == 0 || !key.equals("vt") && list.size() > 1) { 74 throw new IllegalArgumentException( 75 "Illegal number of subtypes for: " + key + "\t" + list); 76 } 77 } 78 key = subtag; 79 if (validating) { 80 validSubtypes = validKeyTypes.getAll(key); 81 if (validSubtypes == null) { 82 throw new IllegalArgumentException("Invalid key: " + key); 83 } 84 } 85 list = keyTypes.get(key); 86 if (list != null) { 87 throw new IllegalArgumentException("Multiple keys with same value: " + subtag); 88 } 89 list = new ArrayList<>(); 90 keyTypes.put(key, list); 91 } else { // add subtype 92 if (key == null) { 93 if (validating) { 94 throw new IllegalArgumentException( 95 "No attributes currently valid: " + subtag); 96 } 97 attributes.add(subtag); 98 break; 99 } 100 if (validating) { 101 if (key.equals("vt")) { 102 if (!CODEPOINTS.matcher(subtag).matches()) { 103 throw new IllegalArgumentException( 104 "Illegal subtypes: " + key + "-" + subtag); 105 } 106 } else if (!validSubtypes.contains(subtag)) { 107 throw new IllegalArgumentException( 108 "Illegal subtypes: " + key + "-" + subtag); 109 } 110 } 111 list.add(subtag); 112 } 113 } 114 // protect 115 attributes = Collections.unmodifiableSet(attributes); 116 for (String key2 : keyTypes.keySet()) { 117 list = keyTypes.get(key2); 118 keyTypes.put(key2, Collections.unmodifiableList(list)); 119 } 120 keyTypes = Collections.unmodifiableSortedMap(keyTypes); 121 return this; 122 } 123 124 @Override toString()125 public String toString() { 126 return "{attributes=" + attributes + ", keyTypes=" + keyTypes + "}"; 127 } 128 } 129