1 package org.unicode.cldr.util; 2 3 import java.util.Collections; 4 import java.util.HashMap; 5 import java.util.HashSet; 6 import java.util.Iterator; 7 import java.util.Map; 8 import java.util.Set; 9 import java.util.regex.Pattern; 10 11 import org.unicode.cldr.util.XPathParts.Comments; 12 13 import com.ibm.icu.impl.Relation; 14 import com.ibm.icu.text.Normalizer2; 15 import com.ibm.icu.text.UnicodeSet; 16 import com.ibm.icu.util.VersionInfo; 17 18 public class SimpleXMLSource extends XMLSource { 19 private Map<String, String> xpath_value = CldrUtility.newConcurrentHashMap(); 20 private Map<String, String> xpath_fullXPath = CldrUtility.newConcurrentHashMap(); 21 private Comments xpath_comments = new Comments(); // map from paths to comments. 22 private Relation<String, String> VALUE_TO_PATH = null; 23 private Object VALUE_TO_PATH_MUTEX = new Object(); 24 private VersionInfo dtdVersionInfo; 25 SimpleXMLSource(String localeID)26 public SimpleXMLSource(String localeID) { 27 this.setLocaleID(localeID); 28 } 29 30 /** 31 * Create a shallow, locked copy of another XMLSource. 32 * 33 * @param copyAsLockedFrom 34 */ SimpleXMLSource(SimpleXMLSource copyAsLockedFrom)35 protected SimpleXMLSource(SimpleXMLSource copyAsLockedFrom) { 36 this.xpath_value = copyAsLockedFrom.xpath_value; 37 this.xpath_fullXPath = copyAsLockedFrom.xpath_fullXPath; 38 this.xpath_comments = copyAsLockedFrom.xpath_comments; 39 this.setLocaleID(copyAsLockedFrom.getLocaleID()); 40 this.locationHash = Collections.unmodifiableMap(copyAsLockedFrom.locationHash); 41 locked = true; 42 } 43 44 @Override getValueAtDPath(String xpath)45 public String getValueAtDPath(String xpath) { 46 return xpath_value.get(xpath); 47 } 48 getValueAtDPathSkippingInheritanceMarker(String xpath)49 public String getValueAtDPathSkippingInheritanceMarker(String xpath) { 50 String result = xpath_value.get(xpath); 51 return CldrUtility.INHERITANCE_MARKER.equals(result) ? null : result; 52 } 53 54 @Override getFullPathAtDPath(String xpath)55 public String getFullPathAtDPath(String xpath) { 56 String result = xpath_fullXPath.get(xpath); 57 if (result != null) return result; 58 if (xpath_value.get(xpath) != null) return xpath; // we don't store duplicates 59 // System.err.println("WARNING: "+getLocaleID()+": path not present in data: " + xpath); 60 // return xpath; 61 return null; // throw new IllegalArgumentException("Path not present in data: " + xpath); 62 } 63 64 @Override getXpathComments()65 public Comments getXpathComments() { 66 return xpath_comments; 67 } 68 69 @Override setXpathComments(Comments xpath_comments)70 public void setXpathComments(Comments xpath_comments) { 71 this.xpath_comments = xpath_comments; 72 } 73 74 // public void putPathValue(String xpath, String value) { 75 // if (locked) throw new UnsupportedOperationException("Attempt to modify locked object"); 76 // String distinguishingXPath = CLDRFile.getDistinguishingXPath(xpath, fixedPath); 77 // xpath_value.put(distinguishingXPath, value); 78 // if (!fixedPath[0].equals(distinguishingXPath)) { 79 // xpath_fullXPath.put(distinguishingXPath, fixedPath[0]); 80 // } 81 // } 82 @Override removeValueAtDPath(String distinguishingXPath)83 public void removeValueAtDPath(String distinguishingXPath) { 84 String oldValue = xpath_value.get(distinguishingXPath); 85 xpath_value.remove(distinguishingXPath); 86 xpath_fullXPath.remove(distinguishingXPath); 87 updateValuePathMapping(distinguishingXPath, oldValue, null); 88 } 89 90 @Override iterator()91 public Iterator<String> iterator() { // must be unmodifiable or locked 92 return Collections.unmodifiableSet(xpath_value.keySet()).iterator(); 93 } 94 95 @Override freeze()96 public XMLSource freeze() { 97 locked = true; 98 return this; 99 } 100 101 @Override cloneAsThawed()102 public XMLSource cloneAsThawed() { 103 SimpleXMLSource result = (SimpleXMLSource) super.cloneAsThawed(); 104 result.xpath_comments = (Comments) result.xpath_comments.clone(); 105 result.xpath_fullXPath = CldrUtility.newConcurrentHashMap(result.xpath_fullXPath); 106 result.xpath_value = CldrUtility.newConcurrentHashMap(result.xpath_value); 107 result.locationHash.putAll(result.locationHash); 108 return result; 109 } 110 111 @Override putFullPathAtDPath(String distinguishingXPath, String fullxpath)112 public void putFullPathAtDPath(String distinguishingXPath, String fullxpath) { 113 xpath_fullXPath.put(distinguishingXPath, fullxpath); 114 } 115 116 @Override putValueAtDPath(String distinguishingXPath, String value)117 public void putValueAtDPath(String distinguishingXPath, String value) { 118 String oldValue = xpath_value.get(distinguishingXPath); 119 xpath_value.put(distinguishingXPath, value); 120 updateValuePathMapping(distinguishingXPath, oldValue, value); 121 } 122 updateValuePathMapping(String distinguishingXPath, String oldValue, String newValue)123 private void updateValuePathMapping(String distinguishingXPath, String oldValue, String newValue) { 124 synchronized (VALUE_TO_PATH_MUTEX) { 125 if (VALUE_TO_PATH != null) { 126 if (oldValue != null) { 127 VALUE_TO_PATH.remove(normalize(oldValue), distinguishingXPath); 128 } 129 if (newValue != null) { 130 VALUE_TO_PATH.put(normalize(newValue), distinguishingXPath); 131 } 132 } 133 } 134 } 135 136 @Override getPathsWithValue(String valueToMatch, String pathPrefix, Set<String> result)137 public void getPathsWithValue(String valueToMatch, String pathPrefix, Set<String> result) { 138 // build a Relation mapping value to paths, if needed 139 synchronized (VALUE_TO_PATH_MUTEX) { 140 if (VALUE_TO_PATH == null) { 141 VALUE_TO_PATH = Relation.of(new HashMap<String, Set<String>>(), HashSet.class); 142 for (Iterator<String> it = iterator(); it.hasNext();) { 143 String path = it.next(); 144 String value1 = getValueAtDPathSkippingInheritanceMarker(path); 145 if (value1 == null) { 146 continue; 147 } 148 String value = normalize(value1); 149 VALUE_TO_PATH.put(value, path); 150 } 151 } 152 Set<String> paths = VALUE_TO_PATH.getAll(normalize(valueToMatch)); 153 if (paths == null) { 154 return; 155 } 156 if (pathPrefix == null || pathPrefix.length() == 0) { 157 result.addAll(paths); 158 return; 159 } 160 for (String path : paths) { 161 if (path.startsWith(pathPrefix)) { 162 // if (altPath.originalPath.startsWith(altPrefix.originalPath)) { 163 result.add(path); 164 } 165 } 166 } 167 } 168 169 static final Normalizer2 NFKCCF = Normalizer2.getNFKCCasefoldInstance(); 170 static final Normalizer2 NFKC = Normalizer2.getNFKCInstance(); 171 172 // The following includes letters, marks, numbers, currencies, and *selected* symbols/punctuation 173 static final UnicodeSet NON_ALPHANUM = new UnicodeSet("[^[:L:][:M:][:N:][:Sc:][\\u202F\uFFFF _ ¡ « ( ) \\- \\[ \\] \\{ \\} § / \\\\ % ٪ ‰ ؉ ‱-″ ` \\^ ¯ ¨ ° + ¬ | ¦ ~ − ⊕ ⍰ ☉ © ®]]").freeze(); 174 normalize(String valueToMatch)175 public static String normalize(String valueToMatch) { 176 return normalize2(valueToMatch, NFKCCF); 177 } 178 normalizeCaseSensitive(String valueToMatch)179 public static String normalizeCaseSensitive(String valueToMatch) { 180 return normalize2(valueToMatch, NFKC); 181 } 182 normalize2(String valueToMatch, Normalizer2 normalizer2)183 public static String normalize2(String valueToMatch, Normalizer2 normalizer2) { 184 if (valueToMatch.indexOf('\u202F') >= 0) { // special hack to allow \u202f, which is otherwise removed by NFKC 185 String temp = valueToMatch.replace('\u202F', '\uFFFF'); 186 String result = replace(NON_ALPHANUM, normalizer2.normalize(temp), ""); 187 return result.replace('\uFFFF','\u202F'); 188 } 189 return replace(NON_ALPHANUM, normalizer2.normalize(valueToMatch), ""); 190 } 191 replace(UnicodeSet unicodeSet, String valueToMatch, String substitute)192 public static String replace(UnicodeSet unicodeSet, String valueToMatch, String substitute) { 193 // handle patterns 194 if (valueToMatch.contains("{")) { 195 valueToMatch = PLACEHOLDER.matcher(valueToMatch).replaceAll("⍰").trim(); 196 } 197 StringBuilder b = null; // delay creating until needed 198 for (int i = 0; i < valueToMatch.length(); ++i) { 199 int cp = valueToMatch.codePointAt(i); 200 if (unicodeSet.contains(cp)) { 201 if (b == null) { 202 b = new StringBuilder(); 203 b.append(valueToMatch.substring(0, i)); // copy the start 204 } 205 if (substitute.length() != 0) { 206 b.append(substitute); 207 } 208 } else if (b != null) { 209 b.appendCodePoint(cp); 210 } 211 if (cp > 0xFFFF) { // skip end of supplemental character 212 ++i; 213 } 214 } 215 if (b != null) { 216 valueToMatch = b.toString(); 217 } 218 return valueToMatch; 219 } 220 221 static final Pattern PLACEHOLDER = PatternCache.get("\\{\\d\\}"); 222 setDtdVersionInfo(VersionInfo dtdVersionInfo)223 public void setDtdVersionInfo(VersionInfo dtdVersionInfo) { 224 this.dtdVersionInfo = dtdVersionInfo; 225 } 226 227 @Override getDtdVersionInfo()228 public VersionInfo getDtdVersionInfo() { 229 return dtdVersionInfo; 230 } 231 232 private Map<String, SourceLocation> locationHash = new HashMap<>(); 233 234 @Override addSourceLocation(String currentFullXPath, SourceLocation location)235 public XMLSource addSourceLocation(String currentFullXPath, SourceLocation location) { 236 if (!isFrozen()) { 237 locationHash.put(currentFullXPath.intern(), location); 238 } else { 239 System.err.println("SimpleXMLSource::addSourceLocationAttempt to modify frozen source location"); 240 } 241 return this; 242 } 243 244 @Override getSourceLocation(String fullXPath)245 public SourceLocation getSourceLocation(String fullXPath) { 246 return locationHash.get(fullXPath); 247 } 248 } 249