1 package org.unicode.cldr.json; 2 3 import java.text.ParseException; 4 import java.util.ArrayList; 5 6 import org.unicode.cldr.json.LdmlConvertRules.SplittableAttributeSpec; 7 import org.unicode.cldr.util.DtdData; 8 import org.unicode.cldr.util.DtdType; 9 import org.unicode.cldr.util.XPathParts; 10 import org.unicode.cldr.util.ZoneParser; 11 12 /** 13 * A object to present a CLDR XML item. 14 */ 15 public class CldrItem implements Comparable<CldrItem> { 16 17 private static boolean DEBUG = false; 18 19 /** 20 * Split the path to an array of string, each string represent a segment. 21 * 22 * @param path 23 * The path of XML element. 24 * @return array of segments. 25 */ splitPathToSegments(String path)26 private static String[] splitPathToSegments(String path) { 27 // remove leading // 28 if (path.startsWith("//")) { 29 path = path.substring(2); 30 } 31 32 int start = 0; 33 ArrayList<String> segments = new ArrayList<>(); 34 boolean inBracket = false; 35 boolean inBracketQuote = false; 36 for (int pos = start; pos < path.length(); ++pos) { 37 char ch = path.charAt(pos); 38 if (inBracketQuote) { 39 if (ch == '"') { 40 inBracketQuote = false; 41 } 42 } else if (inBracket) { 43 if (ch == ']') { 44 inBracket = false; 45 } else if (ch == '"') { 46 inBracketQuote = true; 47 } 48 } else { 49 if (ch == '[') { 50 inBracket = true; 51 } else if (ch == '/') { 52 segments.add(path.substring(start, pos)); 53 start = pos + 1; 54 } 55 } 56 } 57 segments.add(path.substring(start, path.length())); 58 59 return segments.toArray(new String[segments.size()]); 60 } 61 62 /** 63 * The full path of a CLDR item. 64 * 65 * Comparing to path, this full contains non-distinguishable attributes. 66 */ 67 private String fullPath; 68 69 /** 70 * The resolution path of a CLDR item. 71 * 72 * This path only contains distinguishable attributes that are necessary to 73 * identify a CLDR XML item in the CLDR tree. 74 */ 75 private String path; 76 77 /** 78 * The full path of a CLDR item. 79 * 80 * Comparing to path, this full contains non-distinguishable attributes. 81 */ 82 private String untransformedFullPath; 83 84 /** 85 * The resolution path of a CLDR item. 86 * 87 * This path only contains distinguishable attributes that are necessary to 88 * identify a CLDR XML item in the CLDR tree. 89 */ 90 private String untransformedPath; 91 92 /** 93 * The value of this CLDR item. 94 */ 95 private String value; 96 CldrItem(final String path, String fullPath, String untransformedPath, String untransformedFullPath, String value)97 CldrItem(final String path, String fullPath, String untransformedPath, String untransformedFullPath, String value) { 98 99 if (DEBUG) { 100 System.out.println("---"); 101 System.out.println(" PATH => " + path); 102 System.out.println("FULLPATH => " + fullPath); 103 System.out.println(" VALUE => " + value); 104 System.out.println("---"); 105 } 106 107 if(path.isEmpty()) { 108 // Should not happen 109 throw new IllegalArgumentException("empty path with " + fullPath+"|"+untransformedPath+"|"+untransformedFullPath+ " = " + value ); 110 } 111 112 this.path = path; 113 this.fullPath = fullPath; 114 this.untransformedPath = untransformedPath; 115 this.untransformedFullPath = untransformedFullPath; 116 117 if (value == null) { 118 this.value = ""; 119 } else { 120 this.value = value; 121 } 122 } 123 getFullPath()124 public String getFullPath() { 125 return fullPath; 126 } 127 getPath()128 public String getPath() { 129 return path; 130 } 131 132 /** 133 * Obtain the sortKey string, construct it if not yet. 134 * 135 * @return sort key string. 136 */ 137 getValue()138 public String getValue() { 139 return value; 140 } 141 142 // Zone and time zone element has '/' in attribute value, like 143 // .../zone[@type="America/Adak"]/... 144 // Such element can not be converted to "zone-type-America/Adak" as it is 145 // not url safe. To deal with such issue, two segment are generated. It is 146 // like the original path is written as: 147 // .../zone/America/Adak/... 148 setValue(String value)149 public void setValue(String value) { 150 this.value = value; 151 } 152 setFullPath(String fullPath)153 public void setFullPath(String fullPath) { 154 this.fullPath = fullPath; 155 } 156 157 /** 158 * This function create a node list from a CLDR path. 159 * 160 * Mostly, the node has one-to-one correspondence with path segment. But there 161 * are special cases where one segment can be split to multiple nodes. If 162 * necessary, several segments can also be combined to one node. 163 * 164 * @return A list of node in strict parent-to-child order. 165 * @throws ParseException 166 */ getNodesInPath()167 public ArrayList<CldrNode> getNodesInPath() throws ParseException { 168 String[] pathSegments = splitPathToSegments(path); 169 String[] fullPathSegments = splitPathToSegments(fullPath); 170 assert (pathSegments.length == fullPathSegments.length); 171 ArrayList<CldrNode> nodesInPath = new ArrayList<>(); 172 173 String parent = ""; 174 for (int i = 0; i < pathSegments.length; i++) { 175 CldrNode node = CldrNode.createNode(parent, pathSegments[i], 176 fullPathSegments[i]); 177 178 // Zone and time zone element has '/' in attribute value, like 179 // .../zone[@type="America/Adak"]/... 180 // Such element can not be converted to "zone-type-America/Adak" as it is 181 // not url safe. To deal with such issue, two segment are generated. It is 182 // like the original path is written as: 183 // .../zone/America/Adak/... 184 String nodeName = node.getName(); 185 if (node.isTimezoneType()) { 186 nodesInPath.add(CldrNode.createNode(parent, node.getName(), 187 node.getName())); 188 String typeValue = node.getDistinguishingAttributes().get("type"); 189 typeValue = typeValue.replaceAll("Asia:Taipei", "Asia/Taipei"); 190 String[] segments = typeValue.split("/"); 191 for (int j = 0; j < segments.length; j++) { 192 CldrNode newNode = CldrNode.createNode(parent, node.getName(), 193 node.getName()); 194 if (j == segments.length - 1) { 195 newNode.getDistinguishingAttributes().putAll( 196 node.getDistinguishingAttributes()); 197 newNode.getDistinguishingAttributes().remove("type"); 198 } 199 newNode.getDistinguishingAttributes().put("type", segments[j]); 200 nodesInPath.add(newNode); 201 } 202 } else { 203 nodesInPath.add(node); 204 } 205 parent = nodeName; 206 } 207 return nodesInPath; 208 } 209 setPath(String path)210 public void setPath(String path) { 211 if(path.isEmpty()) { 212 throw new IllegalArgumentException("empty path"); 213 } 214 this.path = path; 215 } 216 217 /** 218 * Some CLDR items have attributes that should be split before 219 * transformation. For examples, item like: 220 * <calendarPreference territories="CN CX" ordering="gregorian chinese"/> 221 * should really be treated as 2 separate items: 222 * <calendarPreference territories="CN" ordering="gregorian chinese"/> 223 * <calendarPreference territories="CX" ordering="gregorian chinese"/> 224 * 225 * @return Array of CldrItem if it can be split, otherwise null. 226 */ split()227 public CldrItem[] split() { 228 XPathParts xpp = XPathParts.getFrozenInstance(path); 229 XPathParts fullxpp = XPathParts.getFrozenInstance(fullPath); 230 XPathParts untransformedxpp = XPathParts.getFrozenInstance(untransformedPath); 231 XPathParts untransformedfullxpp = XPathParts.getFrozenInstance(untransformedFullPath); 232 233 for (SplittableAttributeSpec s : LdmlConvertRules.SPLITTABLE_ATTRS) { 234 if (fullxpp.containsElement(s.element) && fullxpp.containsAttribute(s.attribute)) { 235 ArrayList<CldrItem> list = new ArrayList<>(); 236 String wordString = fullxpp.findAttributeValue(s.element, s.attribute); 237 String[] words = null; 238 words = wordString.trim().split("\\s+"); 239 for (String word : words) { 240 XPathParts newxpp = xpp.cloneAsThawed(); 241 XPathParts newfullxpp = fullxpp.cloneAsThawed(); 242 XPathParts untransformednewxpp = untransformedxpp.cloneAsThawed(); 243 XPathParts untransformednewfullxpp = untransformedfullxpp.cloneAsThawed(); 244 245 newxpp.setAttribute(s.element, s.attribute, word); 246 newfullxpp.setAttribute(s.element, s.attribute, word); 247 untransformednewxpp.setAttribute(s.element, s.attribute, word); 248 untransformednewfullxpp.setAttribute(s.element, s.attribute, word); 249 250 if (s.attrAsValueAfterSplit != null) { 251 String newValue = fullxpp.findAttributeValue(s.element, s.attrAsValueAfterSplit); 252 newxpp.removeAttribute(s.element, s.attrAsValueAfterSplit); 253 newxpp.removeAttribute(s.element, s.attribute); 254 newxpp.addElement(word); 255 newfullxpp.removeAttribute(s.element, s.attrAsValueAfterSplit); 256 newfullxpp.removeAttribute(s.element, s.attribute); 257 newfullxpp.addElement(word); 258 list.add(new CldrItem(newxpp.toString(), newfullxpp.toString(), untransformednewxpp.toString(), untransformednewfullxpp.toString(), 259 newValue)); 260 } else { 261 list.add(new CldrItem(newxpp.toString(), newfullxpp.toString(), untransformednewxpp.toString(), untransformednewfullxpp.toString(), 262 value)); 263 } 264 } 265 return list.toArray(new CldrItem[list.size()]); 266 } 267 } 268 return null; 269 } 270 271 /** 272 * Check if the element path contains any item that need to be sorted first. 273 * 274 * @return True if the element need to be sorted before further process. 275 */ needsSort()276 public boolean needsSort() { 277 for (String item : LdmlConvertRules.ELEMENT_NEED_SORT) { 278 XPathParts xpp = XPathParts.getFrozenInstance(path); 279 if (xpp.containsElement(item)) { 280 return true; 281 } 282 } 283 return false; 284 } 285 isAliasItem()286 public boolean isAliasItem() { 287 return path.endsWith("/alias"); 288 } 289 290 @Override compareTo(CldrItem otherItem)291 public int compareTo(CldrItem otherItem) { 292 XPathParts thisxpp = XPathParts.getFrozenInstance(untransformedPath); 293 XPathParts otherxpp = XPathParts.getFrozenInstance(otherItem.untransformedFullPath); 294 if (thisxpp.containsElement("zone") && otherxpp.containsElement("zone")) { 295 String[] thisZonePieces = thisxpp.findAttributeValue("zone", "type").split("/"); 296 String[] otherZonePieces = otherxpp.findAttributeValue("zone", "type").split("/"); 297 int result = ZoneParser.regionalCompare.compare(thisZonePieces[0], otherZonePieces[0]); 298 if (result != 0) { 299 return result; 300 } 301 result = thisZonePieces[1].compareTo(otherZonePieces[1]); 302 if (result != 0) { 303 return result; 304 } 305 } 306 307 DtdType fileDtdType; 308 if (thisxpp.getElement(0).equals("supplementalData")) { 309 fileDtdType = DtdType.supplementalData; 310 } else { 311 fileDtdType = DtdType.ldml; 312 } 313 int result = 0; 314 if (thisxpp.getElement(1).equals("weekData") && thisxpp.getElement(2).equals(otherxpp.getElement(2))) { 315 String thisTerritory = thisxpp.findFirstAttributeValue("territories"); 316 String otherTerritory = otherxpp.findFirstAttributeValue("territories"); 317 if (thisTerritory != null && otherTerritory != null) { 318 result = thisTerritory.compareTo(otherTerritory); 319 } 320 if (result != 0) { 321 return result; 322 } 323 } 324 if (thisxpp.getElement(1).equals("measurementData") && thisxpp.getElement(2).equals(otherxpp.getElement(2))) { 325 String thisCategory = thisxpp.findAttributeValue("measurementSystem", "category"); 326 if (thisCategory == null) { 327 thisCategory = ""; 328 } 329 String otherCategory = otherxpp.findAttributeValue("measurementSystem", "category"); 330 if (otherCategory == null) { 331 otherCategory = ""; 332 } 333 if (!thisCategory.equals(otherCategory)) { 334 result = thisCategory.compareTo(otherCategory); 335 return result; 336 } 337 String thisTerritory = thisxpp.findFirstAttributeValue("territories"); 338 String otherTerritory = otherxpp.findFirstAttributeValue("territories"); 339 if (thisTerritory != null && otherTerritory != null) { 340 result = thisTerritory.compareTo(otherTerritory); 341 } 342 if (result != 0) { 343 return result; 344 } 345 } 346 result = DtdData.getInstance(fileDtdType).getDtdComparator(null).compare(untransformedPath, otherItem.untransformedPath); 347 return result; 348 //return CLDRFile.getLdmlComparator().compare(path, otherItem.path); 349 //return path.compareTo(otherItem.path); 350 } 351 } 352