1 package org.unicode.cldr.json; 2 3 import com.ibm.icu.impl.Utility; 4 import java.text.ParseException; 5 import java.util.HashMap; 6 import java.util.Map; 7 8 /** CldrNode represent a Element in XML as it appears in a CldrItem's path. */ 9 public class CldrNode { 10 createNode(String parent, String pathSegment, String fullPathSegment)11 public static CldrNode createNode(String parent, String pathSegment, String fullPathSegment) 12 throws ParseException { 13 return createNode(parent, pathSegment, fullPathSegment, null); 14 } 15 createNode( String parent, String pathSegment, String fullPathSegment, CldrItem item)16 public static CldrNode createNode( 17 String parent, String pathSegment, String fullPathSegment, CldrItem item) 18 throws ParseException { 19 CldrNode node = new CldrNode(); 20 node.item = item; 21 node.parent = parent; 22 node.name = extractAttrs(pathSegment, node.distinguishingAttributes); 23 String fullTrunk = extractAttrs(fullPathSegment, node.nondistinguishingAttributes); 24 if (!node.name.equals(fullTrunk)) { 25 throw new ParseException( 26 "Error in parsing \"" 27 + pathSegment 28 + "\":\"" 29 + fullPathSegment 30 + " - " 31 + node.name 32 + " != " 33 + fullTrunk, 34 0); 35 } 36 37 for (String key : node.distinguishingAttributes.keySet()) { 38 node.nondistinguishingAttributes.remove(key); 39 } 40 41 String[] suppressList = LdmlConvertRules.ATTR_SUPPRESS_LIST; 42 43 // let's check if there is anything that can be suppressed 44 // TODO: should hash the parent and pathSegment values so we don't have to linear 45 // search. 46 for (int i = 0; i < suppressList.length; i += 3) { 47 if (node.name.equals(suppressList[i])) { 48 String key = suppressList[i + 2]; 49 String value = node.distinguishingAttributes.get(key); 50 if (value != null && value.equals(suppressList[i + 1])) { 51 node.distinguishingAttributes.remove(key); 52 } 53 } 54 } 55 return node; 56 } 57 58 /** 59 * Extract all the attributes and their value in the path. 60 * 61 * @param pathSegment A complete or partial path. 62 * @param attributes String map to receive attribute mapping. 63 * @return Part of the string before the first attribute. 64 * @throws ParseException 65 */ extractAttrs(String pathSegment, Map<String, String> attributes)66 private static String extractAttrs(String pathSegment, Map<String, String> attributes) 67 throws ParseException { 68 int start = 0; 69 70 String trunk = new String(); 71 while (true) { 72 int ind1 = pathSegment.indexOf("[@", start); 73 if (ind1 < 0) { 74 if (trunk.isEmpty()) { 75 trunk = pathSegment; 76 } 77 break; 78 } 79 if (trunk.isEmpty()) { 80 trunk = pathSegment.substring(0, ind1); 81 } 82 ind1 += 2; 83 int ind2 = pathSegment.indexOf("=", ind1); 84 if (ind2 < 0) { 85 throw new ParseException("Missing '=' in attribute specification.", ind1); 86 } 87 String attr = pathSegment.substring(ind1, ind2); 88 89 ind1 = ind2 + 1; 90 if (pathSegment.charAt(ind1) == '"') { 91 ind1 += 1; 92 ind2 = pathSegment.indexOf("\"]", ind1); 93 } else { 94 ind2 = pathSegment.indexOf("]", ind1); 95 } 96 97 if (ind2 < 0) { 98 throw new ParseException("Unexpected end in attribute specification.", ind1); 99 } 100 101 String value = pathSegment.substring(ind1, ind2); 102 103 start = ind2; 104 105 attributes.put(attr, value); 106 } 107 108 return trunk; 109 } 110 111 /** distinguishing attributes as identified by CLDR tools. */ 112 private Map<String, String> distinguishingAttributes; 113 114 /** non-distinguishing attributes as identified by CLDR tools. */ 115 private Map<String, String> nondistinguishingAttributes; 116 117 /** name of the element. */ 118 private String name; 119 120 /** parent element for this element. */ 121 private String parent; 122 123 /** CldrItem, if any */ 124 private CldrItem item; 125 getUntransformedPath()126 public String getUntransformedPath() { 127 if (item != null) { 128 return item.getUntransformedPath(); 129 } else { 130 return "noitem"; 131 } 132 } 133 134 /** 135 * This name is derived from element name and attributes. Once it is calculated, it is cached in 136 * this variable. 137 */ 138 private String uniqueNodeName; 139 CldrNode()140 private CldrNode() { 141 distinguishingAttributes = new HashMap<>(); 142 nondistinguishingAttributes = new HashMap<>(); 143 } 144 145 /** 146 * Get the string map for attributes that should be treated as values. 147 * 148 * @return String map. 149 */ getAttrAsValueMap()150 public Map<String, String> getAttrAsValueMap() { 151 Map<String, String> attributesAsValues = new HashMap<>(); 152 for (String key : distinguishingAttributes.keySet()) { 153 String keyStr = LdmlConvertRules.getKeyStr(getParent(), name, key); 154 String keyStrMidStar = LdmlConvertRules.getKeyStr(getParent(), "*", key); 155 String keyStr2 = LdmlConvertRules.getKeyStr(name, key); 156 if (LdmlConvertRules.ATTR_AS_VALUE_SET.contains(keyStr) 157 || LdmlConvertRules.ATTR_AS_VALUE_SET.contains(keyStr2)) { 158 if (LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStr) 159 || LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStrMidStar)) { 160 attributesAsValues.put( 161 LdmlConvertRules.ANONYMOUS_KEY, distinguishingAttributes.get(key)); 162 } else { 163 attributesAsValues.put(key, distinguishingAttributes.get(key)); 164 } 165 } 166 } 167 168 for (String key : nondistinguishingAttributes.keySet()) { 169 if (LdmlConvertRules.IGNORABLE_NONDISTINGUISHING_ATTR_SET.contains(key)) { 170 continue; 171 } 172 String keyStr = LdmlConvertRules.getKeyStr(getParent(), name, key); 173 String keyStrMidStar = LdmlConvertRules.getKeyStr(getParent(), "*", key); 174 if (LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStr) 175 || LdmlConvertRules.COMPACTABLE_ATTR_AS_VALUE_SET.contains(keyStrMidStar)) { 176 attributesAsValues.put( 177 LdmlConvertRules.ANONYMOUS_KEY, nondistinguishingAttributes.get(key)); 178 } else { 179 attributesAsValues.put(key, nondistinguishingAttributes.get(key)); 180 } 181 } 182 183 // ADJUST RADIX BASED ON ICU RULE 184 final String radixValue = attributesAsValues.get("radix"); 185 if (radixValue != null) { 186 attributesAsValues.remove("radix"); 187 for (Map.Entry<String, String> attributes : attributesAsValues.entrySet()) { 188 String oldKey = attributes.getKey(); 189 String newValue = attributes.getValue(); 190 String newKey = oldKey + "/" + radixValue; 191 attributesAsValues.remove(oldKey); 192 attributesAsValues.put(newKey, newValue); 193 } 194 } 195 return attributesAsValues; 196 } 197 setDistinguishingAttributes(Map<String, String> distinguishingAttributes)198 public void setDistinguishingAttributes(Map<String, String> distinguishingAttributes) { 199 this.distinguishingAttributes = distinguishingAttributes; 200 } 201 setNondistinguishingAttributes(Map<String, String> nondistinguishingAttributes)202 public void setNondistinguishingAttributes(Map<String, String> nondistinguishingAttributes) { 203 this.nondistinguishingAttributes = nondistinguishingAttributes; 204 } 205 getDistinguishingAttributes()206 public Map<String, String> getDistinguishingAttributes() { 207 return distinguishingAttributes; 208 } 209 getName()210 public String getName() { 211 return name; 212 } 213 getNondistinguishingAttributes()214 public Map<String, String> getNondistinguishingAttributes() { 215 return nondistinguishingAttributes; 216 } 217 218 /** 219 * Construct a name that can be used as key in its container (by incorporating distinguishing 220 * attributes). 221 * 222 * <p>Each segment in CLDR path corresponding to a XML element. Element name itself can not be 223 * used as JSON key because it might not be unique in its container. A set of rules is used here 224 * to construct this key name. Some of the attributes will be used in constructing the key name, 225 * the remaining attributes are returned and should be used to fill the mapping. 226 * 227 * <p>The basic mapping is from <element_name>[@<attr_name>=<attr_value>]+ to 228 * <element_name>-<attr_name>-<attr_value> 229 * 230 * @return A unique name that can be used as key in its container. 231 */ getNodeKeyName()232 public String getNodeKeyName() { 233 if (uniqueNodeName != null) { 234 return uniqueNodeName; 235 } 236 237 // decide the main name 238 StringBuffer strbuf = new StringBuffer(); 239 String lastKey = null; // for err message 240 for (String key : distinguishingAttributes.keySet()) { 241 String attrIdStr = LdmlConvertRules.getKeyStr(getParent(), name, key); 242 String attrIdStr2 = LdmlConvertRules.getKeyStr(name, key); 243 if (LdmlConvertRules.IsSuppresedAttr(attrIdStr)) { 244 continue; 245 } 246 if (LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr) 247 || LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr2)) { // with * 248 continue; 249 } 250 251 if (!key.equals("alt") 252 && !key.equals("count") 253 && !LdmlConvertRules.NAME_PART_DISTINGUISHING_ATTR_SET.contains(attrIdStr)) { 254 if (strbuf.length() != 0) { 255 throw new IllegalArgumentException( 256 "Can not have more than 1 key values in name: " 257 + "both '" 258 + strbuf 259 + "' (" 260 + lastKey 261 + ") and '" 262 + distinguishingAttributes.get(key) 263 + "' (" 264 + key 265 + "). attrIdStr=" 266 + attrIdStr 267 + " - check LdmlConvertRules.java#NAME_PART_DISTINGUISHING_ATTR_SET"); 268 } 269 strbuf.append(distinguishingAttributes.get(key)); 270 lastKey = key; 271 } 272 } 273 if (strbuf.length() == 0) { 274 strbuf.append(name); 275 } 276 277 // append distinguishing attributes 278 for (String key : distinguishingAttributes.keySet()) { 279 String attrIdStr = LdmlConvertRules.getKeyStr(getParent(), name, key); 280 String attrIdStr2 = LdmlConvertRules.getKeyStr(name, key); 281 if (LdmlConvertRules.IsSuppresedAttr(attrIdStr)) { 282 continue; 283 } 284 if (LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr) 285 || LdmlConvertRules.ATTR_AS_VALUE_SET.contains(attrIdStr2)) { 286 continue; 287 } 288 289 if (!key.equals("alt") 290 && !LdmlConvertRules.NAME_PART_DISTINGUISHING_ATTR_SET.contains(attrIdStr)) { 291 continue; 292 } 293 strbuf.append("-"); 294 strbuf.append(key); 295 strbuf.append("-"); 296 strbuf.append(distinguishingAttributes.get(key)); 297 } 298 uniqueNodeName = strbuf.toString(); 299 300 if (uniqueNodeName.length() == 1 && name.equals("character")) { 301 // character attribute has value that can be any unicode character. Those 302 // might not be url safe and can be difficult for user to specify. It is 303 // converted to hex string here. 304 uniqueNodeName = "U+" + Utility.hex(uniqueNodeName.charAt(0), 4); 305 } else if (isTimezoneType()) { 306 // time zone name has GMT+9 type of thing. "+" need to be removed to make 307 // it URL safe. 308 uniqueNodeName = uniqueNodeName.replaceFirst("\\+", ""); 309 } 310 311 return uniqueNodeName; 312 } 313 314 /** 315 * Construct a name that has all distinguishing attributes that should not be ignored. 316 * 317 * <p>Different from getNodeKeyName, this name has include those distinguishing attributes that 318 * will be treated as values. 319 * 320 * @return A distinguishing name for differentiating element. 321 */ getNodeDistinguishingName()322 public String getNodeDistinguishingName() { 323 // decide the main name 324 StringBuffer strbuf = new StringBuffer(); 325 strbuf.append(name); 326 327 // append distinguishing attributes 328 for (String key : distinguishingAttributes.keySet()) { 329 strbuf.append("-"); 330 strbuf.append(key); 331 strbuf.append("-"); 332 strbuf.append(distinguishingAttributes.get(key)); 333 } 334 return strbuf.toString(); 335 } 336 isTimezoneType()337 public boolean isTimezoneType() { 338 return LdmlConvertRules.TIMEZONE_ELEMENT_NAME_SET.contains(name); 339 } 340 341 @Override toString()342 public String toString() { 343 return "[CldrNode " + getParent() + "/" + getNodeDistinguishingName() + "]"; 344 } 345 getParent()346 public String getParent() { 347 return parent; 348 } 349 } 350