• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.json;
2 
3 import java.text.ParseException;
4 import java.util.ArrayList;
5 
6 import org.unicode.cldr.json.LdmlConvertRules.SplittableAttributeSpec;
7 import org.unicode.cldr.util.DtdData;
8 import org.unicode.cldr.util.DtdType;
9 import org.unicode.cldr.util.XPathParts;
10 import org.unicode.cldr.util.ZoneParser;
11 
12 /**
13  * A object to present a CLDR XML item.
14  */
15 public class CldrItem implements Comparable<CldrItem> {
16 
17     private static boolean DEBUG = false;
18 
19     /**
20      * Split the path to an array of string, each string represent a segment.
21      *
22      * @param path
23      *            The path of XML element.
24      * @return array of segments.
25      */
splitPathToSegments(String path)26     private static String[] splitPathToSegments(String path) {
27         // remove leading //
28         if (path.startsWith("//")) {
29             path = path.substring(2);
30         }
31 
32         int start = 0;
33         ArrayList<String> segments = new ArrayList<>();
34         boolean inBracket = false;
35         boolean inBracketQuote = false;
36         for (int pos = start; pos < path.length(); ++pos) {
37             char ch = path.charAt(pos);
38             if (inBracketQuote) {
39                 if (ch == '"') {
40                     inBracketQuote = false;
41                 }
42             } else if (inBracket) {
43                 if (ch == ']') {
44                     inBracket = false;
45                 } else if (ch == '"') {
46                     inBracketQuote = true;
47                 }
48             } else {
49                 if (ch == '[') {
50                     inBracket = true;
51                 } else if (ch == '/') {
52                     segments.add(path.substring(start, pos));
53                     start = pos + 1;
54                 }
55             }
56         }
57         segments.add(path.substring(start, path.length()));
58 
59         return segments.toArray(new String[segments.size()]);
60     }
61 
62     /**
63      * The full path of a CLDR item.
64      *
65      * Comparing to path, this full contains non-distinguishable attributes.
66      */
67     private String fullPath;
68 
69     /**
70      * The resolution path of a CLDR item.
71      *
72      * This path only contains distinguishable attributes that are necessary to
73      * identify a CLDR XML item in the CLDR tree.
74      */
75     private String path;
76 
77     /**
78      * The full path of a CLDR item.
79      *
80      * Comparing to path, this full contains non-distinguishable attributes.
81      */
82     private String untransformedFullPath;
83 
84     /**
85      * The resolution path of a CLDR item.
86      *
87      * This path only contains distinguishable attributes that are necessary to
88      * identify a CLDR XML item in the CLDR tree.
89      */
90     private String untransformedPath;
91 
92     /**
93      * The value of this CLDR item.
94      */
95     private String value;
96 
CldrItem(final String path, String fullPath, String untransformedPath, String untransformedFullPath, String value)97     CldrItem(final String path, String fullPath, String untransformedPath, String untransformedFullPath, String value) {
98 
99         if (DEBUG) {
100             System.out.println("---");
101             System.out.println("    PATH => " + path);
102             System.out.println("FULLPATH => " + fullPath);
103             System.out.println("   VALUE => " + value);
104             System.out.println("---");
105         }
106 
107         if(path.isEmpty()) {
108             // Should not happen
109             throw new IllegalArgumentException("empty path with " + fullPath+"|"+untransformedPath+"|"+untransformedFullPath+ " = " + value );
110         }
111 
112         this.path = path;
113         this.fullPath = fullPath;
114         this.untransformedPath = untransformedPath;
115         this.untransformedFullPath = untransformedFullPath;
116 
117         if (value == null) {
118             this.value = "";
119         } else {
120             this.value = value;
121         }
122     }
123 
getFullPath()124     public String getFullPath() {
125         return fullPath;
126     }
127 
getPath()128     public String getPath() {
129         return path;
130     }
131 
132     /**
133      * Obtain the sortKey string, construct it if not yet.
134      *
135      * @return sort key string.
136      */
137 
getValue()138     public String getValue() {
139         return value;
140     }
141 
142     // Zone and time zone element has '/' in attribute value, like
143     // .../zone[@type="America/Adak"]/...
144     // Such element can not be converted to "zone-type-America/Adak" as it is
145     // not url safe. To deal with such issue, two segment are generated. It is
146     // like the original path is written as:
147     // .../zone/America/Adak/...
148 
setValue(String value)149     public void setValue(String value) {
150         this.value = value;
151     }
152 
setFullPath(String fullPath)153     public void setFullPath(String fullPath) {
154         this.fullPath = fullPath;
155     }
156 
157     /**
158      * This function create a node list from a CLDR path.
159      *
160      * Mostly, the node has one-to-one correspondence with path segment. But there
161      * are special cases where one segment can be split to multiple nodes. If
162      * necessary, several segments can also be combined to one node.
163      *
164      * @return A list of node in strict parent-to-child order.
165      * @throws ParseException
166      */
getNodesInPath()167     public ArrayList<CldrNode> getNodesInPath() throws ParseException {
168         String[] pathSegments = splitPathToSegments(path);
169         String[] fullPathSegments = splitPathToSegments(fullPath);
170         assert (pathSegments.length == fullPathSegments.length);
171         ArrayList<CldrNode> nodesInPath = new ArrayList<>();
172 
173         String parent = "";
174         for (int i = 0; i < pathSegments.length; i++) {
175             CldrNode node = CldrNode.createNode(parent, pathSegments[i],
176                 fullPathSegments[i]);
177 
178             // Zone and time zone element has '/' in attribute value, like
179             // .../zone[@type="America/Adak"]/...
180             // Such element can not be converted to "zone-type-America/Adak" as it is
181             // not url safe. To deal with such issue, two segment are generated. It is
182             // like the original path is written as:
183             // .../zone/America/Adak/...
184             String nodeName = node.getName();
185             if (node.isTimezoneType()) {
186                 nodesInPath.add(CldrNode.createNode(parent, node.getName(),
187                     node.getName()));
188                 String typeValue = node.getDistinguishingAttributes().get("type");
189                 typeValue = typeValue.replaceAll("Asia:Taipei", "Asia/Taipei");
190                 String[] segments = typeValue.split("/");
191                 for (int j = 0; j < segments.length; j++) {
192                     CldrNode newNode = CldrNode.createNode(parent, node.getName(),
193                         node.getName());
194                     if (j == segments.length - 1) {
195                         newNode.getDistinguishingAttributes().putAll(
196                             node.getDistinguishingAttributes());
197                         newNode.getDistinguishingAttributes().remove("type");
198                     }
199                     newNode.getDistinguishingAttributes().put("type", segments[j]);
200                     nodesInPath.add(newNode);
201                 }
202             } else {
203                 nodesInPath.add(node);
204             }
205             parent = nodeName;
206         }
207         return nodesInPath;
208     }
209 
setPath(String path)210     public void setPath(String path) {
211         if(path.isEmpty()) {
212             throw new IllegalArgumentException("empty path");
213         }
214         this.path = path;
215     }
216 
217     /**
218      * Some CLDR items have attributes that should be split before
219      * transformation. For examples, item like:
220      * <calendarPreference territories="CN CX" ordering="gregorian chinese"/>
221      * should really be treated as 2 separate items:
222      * <calendarPreference territories="CN" ordering="gregorian chinese"/>
223      * <calendarPreference territories="CX" ordering="gregorian chinese"/>
224      *
225      * @return Array of CldrItem if it can be split, otherwise null.
226      */
split()227     public CldrItem[] split() {
228         XPathParts xpp = XPathParts.getFrozenInstance(path);
229         XPathParts fullxpp = XPathParts.getFrozenInstance(fullPath);
230         XPathParts untransformedxpp = XPathParts.getFrozenInstance(untransformedPath);
231         XPathParts untransformedfullxpp = XPathParts.getFrozenInstance(untransformedFullPath);
232 
233         for (SplittableAttributeSpec s : LdmlConvertRules.SPLITTABLE_ATTRS) {
234             if (fullxpp.containsElement(s.element) && fullxpp.containsAttribute(s.attribute)) {
235                 ArrayList<CldrItem> list = new ArrayList<>();
236                 String wordString = fullxpp.findAttributeValue(s.element, s.attribute);
237                 String[] words = null;
238                 words = wordString.trim().split("\\s+");
239                 for (String word : words) {
240                     XPathParts newxpp = xpp.cloneAsThawed();
241                     XPathParts newfullxpp = fullxpp.cloneAsThawed();
242                     XPathParts untransformednewxpp = untransformedxpp.cloneAsThawed();
243                     XPathParts untransformednewfullxpp = untransformedfullxpp.cloneAsThawed();
244 
245                     newxpp.setAttribute(s.element, s.attribute, word);
246                     newfullxpp.setAttribute(s.element, s.attribute, word);
247                     untransformednewxpp.setAttribute(s.element, s.attribute, word);
248                     untransformednewfullxpp.setAttribute(s.element, s.attribute, word);
249 
250                     if (s.attrAsValueAfterSplit != null) {
251                         String newValue = fullxpp.findAttributeValue(s.element, s.attrAsValueAfterSplit);
252                         newxpp.removeAttribute(s.element, s.attrAsValueAfterSplit);
253                         newxpp.removeAttribute(s.element, s.attribute);
254                         newxpp.addElement(word);
255                         newfullxpp.removeAttribute(s.element, s.attrAsValueAfterSplit);
256                         newfullxpp.removeAttribute(s.element, s.attribute);
257                         newfullxpp.addElement(word);
258                         list.add(new CldrItem(newxpp.toString(), newfullxpp.toString(), untransformednewxpp.toString(), untransformednewfullxpp.toString(),
259                             newValue));
260                     } else {
261                         list.add(new CldrItem(newxpp.toString(), newfullxpp.toString(), untransformednewxpp.toString(), untransformednewfullxpp.toString(),
262                             value));
263                     }
264                 }
265                 return list.toArray(new CldrItem[list.size()]);
266             }
267         }
268         return null;
269     }
270 
271     /**
272      * Check if the element path contains any item that need to be sorted first.
273      *
274      * @return True if the element need to be sorted before further process.
275      */
needsSort()276     public boolean needsSort() {
277         for (String item : LdmlConvertRules.ELEMENT_NEED_SORT) {
278             XPathParts xpp = XPathParts.getFrozenInstance(path);
279             if (xpp.containsElement(item)) {
280                 return true;
281             }
282         }
283         return false;
284     }
285 
isAliasItem()286     public boolean isAliasItem() {
287         return path.endsWith("/alias");
288     }
289 
290     @Override
compareTo(CldrItem otherItem)291     public int compareTo(CldrItem otherItem) {
292         XPathParts thisxpp = XPathParts.getFrozenInstance(untransformedPath);
293         XPathParts otherxpp = XPathParts.getFrozenInstance(otherItem.untransformedFullPath);
294         if (thisxpp.containsElement("zone") && otherxpp.containsElement("zone")) {
295             String[] thisZonePieces = thisxpp.findAttributeValue("zone", "type").split("/");
296             String[] otherZonePieces = otherxpp.findAttributeValue("zone", "type").split("/");
297             int result = ZoneParser.regionalCompare.compare(thisZonePieces[0], otherZonePieces[0]);
298             if (result != 0) {
299                 return result;
300             }
301             result = thisZonePieces[1].compareTo(otherZonePieces[1]);
302             if (result != 0) {
303                 return result;
304             }
305         }
306 
307         DtdType fileDtdType;
308         if (thisxpp.getElement(0).equals("supplementalData")) {
309             fileDtdType = DtdType.supplementalData;
310         } else {
311             fileDtdType = DtdType.ldml;
312         }
313         int result = 0;
314         if (thisxpp.getElement(1).equals("weekData") && thisxpp.getElement(2).equals(otherxpp.getElement(2))) {
315             String thisTerritory = thisxpp.findFirstAttributeValue("territories");
316             String otherTerritory = otherxpp.findFirstAttributeValue("territories");
317             if (thisTerritory != null && otherTerritory != null) {
318                 result = thisTerritory.compareTo(otherTerritory);
319             }
320             if (result != 0) {
321                 return result;
322             }
323         }
324         if (thisxpp.getElement(1).equals("measurementData") && thisxpp.getElement(2).equals(otherxpp.getElement(2))) {
325             String thisCategory = thisxpp.findAttributeValue("measurementSystem", "category");
326             if (thisCategory == null) {
327                 thisCategory = "";
328             }
329             String otherCategory = otherxpp.findAttributeValue("measurementSystem", "category");
330             if (otherCategory == null) {
331                 otherCategory = "";
332             }
333             if (!thisCategory.equals(otherCategory)) {
334                 result = thisCategory.compareTo(otherCategory);
335                 return result;
336             }
337             String thisTerritory = thisxpp.findFirstAttributeValue("territories");
338             String otherTerritory = otherxpp.findFirstAttributeValue("territories");
339             if (thisTerritory != null && otherTerritory != null) {
340                 result = thisTerritory.compareTo(otherTerritory);
341             }
342             if (result != 0) {
343                 return result;
344             }
345         }
346         result = DtdData.getInstance(fileDtdType).getDtdComparator(null).compare(untransformedPath, otherItem.untransformedPath);
347         return result;
348         //return CLDRFile.getLdmlComparator().compare(path, otherItem.path);
349         //return path.compareTo(otherItem.path);
350     }
351 }
352