• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  * Copyright (C) 2004-2005, International Business Machines Corporation and        *
4  * others. All Rights Reserved.                                               *
5  ******************************************************************************
6  *
7  *
8  */
9 package org.unicode.cldr.util;
10 
11 import java.util.Iterator;
12 import java.util.Map;
13 import java.util.TreeMap;
14 
15 import com.ibm.icu.text.Normalizer;
16 import com.ibm.icu.text.UTF16;
17 import com.ibm.icu.text.UnicodeSet;
18 
19 public class ExtractCollationRules {
20     Map<String, String> type_rules = new TreeMap<>();
21     StringBuffer rules = new StringBuffer();
22 
set(CLDRFile file)23     public ExtractCollationRules set(CLDRFile file) {
24         type_rules.clear();
25         String lastType = "";
26         rules.setLength(0);
27 
28         String context = null;
29 
30         for (Iterator it = file.iterator("//ldml/collations", file.getComparator()); it.hasNext();) {
31 
32             // System.out.print(rules.substring(lastLen, rules.length()));
33             // lastLen = rules.length();
34 
35             String path = (String) it.next();
36             String value = file.getStringValue(path);
37             XPathParts parts = XPathParts.getFrozenInstance(path);
38             String type = parts.findAttributeValue("collation", "type");
39             if (!type.equals(lastType)) {
40                 lastType = type;
41                 type_rules.put(lastType, rules.toString());
42                 rules.setLength(0);
43             }
44             String mainType = parts.getElement(3);
45             // base?, settings?, suppress_contractions?, optimize?
46             // x: context?, ( p | pc | s | sc | t | tc | i | ic )*, extend?
47             if (mainType.equals("settings")) {
48                 writeSettings(parts.getAttributes(3), rules);
49                 continue;
50             } else if (mainType.equals("rules")) {
51                 String ruleType = parts.getElement(4);
52                 char c = ruleType.charAt(0);
53                 if (c == 'x') {
54                     ruleType = parts.getElement(5);
55                     c = ruleType.charAt(0);
56                 }
57                 boolean isMultiple = ruleType.length() > 1 && ruleType.charAt(1) == 'c';
58                 String lastContext = context;
59                 context = null;
60                 switch (c) {
61                 case 'r':
62                     appendOrdering("&", null, value, false, true);
63                     break;
64                 case 'p':
65                     appendOrdering("<", lastContext, value, isMultiple, true);
66                     break;
67                 case 's':
68                     appendOrdering("<<", lastContext, value, isMultiple, true);
69                     break;
70                 case 't':
71                     appendOrdering("<<<", lastContext, value, isMultiple, false);
72                     break;
73                 case 'i':
74                     appendOrdering("=", lastContext, value, isMultiple, false);
75                     break;
76                 case 'c':
77                     context = value;
78                     break;
79                 case 'e':
80                     appendOrdering("/", null, value, false, false);
81                     break;
82                 default:
83                     System.out.println("Couldn't handle: " + path + "\t" + value);
84                 }
85                 continue;
86             } else {
87 
88             }
89             System.out.println("Couldn't handle: " + path + "\t" + value);
90         }
91         type_rules.put(lastType, rules.toString());
92         return this;
93     }
94 
appendOrdering(String relation, String context, String valueAfter, boolean isMultiple, boolean lineBreakBefore)95     private void appendOrdering(String relation, String context, String valueAfter, boolean isMultiple,
96         boolean lineBreakBefore) {
97         if (isMultiple) {
98             int cp;
99             for (int i = 0; i < valueAfter.length(); i += UTF16.getCharCount(cp)) {
100                 cp = UTF16.charAt(valueAfter, i);
101                 if (lineBreakBefore)
102                     rules.append(CldrUtility.LINE_SEPARATOR);
103                 else
104                     rules.append(' ');
105                 rules.append(relation);
106                 if (context != null) rules.append(' ').append(quote(context));
107                 rules.append(' ').append(quote(UTF16.valueOf(cp)));
108             }
109         } else {
110             if (lineBreakBefore)
111                 rules.append(CldrUtility.LINE_SEPARATOR);
112             else
113                 rules.append(' ');
114             rules.append(relation);
115             if (context != null) rules.append(' ').append(quote(context));
116             rules.append(' ').append(quote(valueAfter));
117         }
118     }
119 
writeSettings(Map<String, String> attributes, StringBuffer results)120     private void writeSettings(Map<String, String> attributes, StringBuffer results) {
121         for (Iterator<String> it = attributes.keySet().iterator(); it.hasNext();) {
122             String attribute = it.next();
123             String value = attributes.get(attribute);
124             // TODO fix different cases
125             results.append("[" + attribute + " " + value + "]" + CldrUtility.LINE_SEPARATOR);
126             // if (attribute.equals("normalization")) {
127             //
128             // }
129         }
130     }
131 
iterator()132     public Iterator<String> iterator() {
133         return type_rules.keySet().iterator();
134     }
135 
getRules(Object key)136     public String getRules(Object key) {
137         return type_rules.get(key);
138     }
139 
140     static StringBuffer quoteOperandBuffer = new StringBuffer(); // faster
141 
142     static UnicodeSet needsQuoting = null;
143     static UnicodeSet needsUnicodeForm = null;
144 
quote(String s)145     static final String quote(String s) {
146         if (needsQuoting == null) {
147             /*
148              * c >= 'a' && c <= 'z'
149              * || c >= 'A' && c <= 'Z'
150              * || c >= '0' && c <= '9'
151              * || (c >= 0xA0 && !UCharacterProperty.isRuleWhiteSpace(c))
152              */
153             needsQuoting = new UnicodeSet(
154                 "[[:whitespace:][:c:][:z:][:ascii:]-[a-zA-Z0-9]]"); //
155             // "[[:ascii:]-[a-zA-Z0-9]-[:c:]-[:z:]]"); // [:whitespace:][:c:][:z:]
156             // for (int i = 0; i <= 0x10FFFF; ++i) {
157             // if (UCharacterProperty.isRuleWhiteSpace(i)) needsQuoting.add(i);
158             // }
159             // needsQuoting.remove();
160             needsUnicodeForm = new UnicodeSet("[\\u000d\\u000a[:zl:][:zp:]]");
161         }
162         s = Normalizer.compose(s, false);
163         quoteOperandBuffer.setLength(0);
164         boolean noQuotes = true;
165         boolean inQuote = false;
166         int cp;
167         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
168             cp = UTF16.charAt(s, i);
169             if (!needsQuoting.contains(cp)) {
170                 if (inQuote) {
171                     quoteOperandBuffer.append('\'');
172                     inQuote = false;
173                 }
174                 quoteOperandBuffer.append(UTF16.valueOf(cp));
175             } else {
176                 noQuotes = false;
177                 if (cp == '\'') {
178                     quoteOperandBuffer.append("''");
179                 } else {
180                     if (!inQuote) {
181                         quoteOperandBuffer.append('\'');
182                         inQuote = true;
183                     }
184                     if (!needsUnicodeForm.contains(cp))
185                         quoteOperandBuffer.append(UTF16.valueOf(cp)); // cp != 0x2028
186                     else if (cp > 0xFFFF) {
187                         quoteOperandBuffer.append("\\U").append(hex(cp, 8));
188                     } else if (cp <= 0x20 || cp > 0x7E) {
189                         quoteOperandBuffer.append("\\u").append(hex(cp, 4));
190                     } else {
191                         quoteOperandBuffer.append(UTF16.valueOf(cp));
192                     }
193                 }
194             }
195             /*
196              * switch (c) {
197              * case '<': case '>': case '#': case '=': case '&': case '/':
198              * quoteOperandBuffer.append('\'').append(c).append('\'');
199              * break;
200              * case '\'':
201              * quoteOperandBuffer.append("''");
202              * break;
203              * default:
204              * if (0 <= c && c < 0x20 || 0x7F <= c && c < 0xA0) {
205              * quoteOperandBuffer.append("\\u").append(Utility.hex(c));
206              * break;
207              * }
208              * quoteOperandBuffer.append(c);
209              * break;
210              * }
211              */
212         }
213         if (inQuote) {
214             quoteOperandBuffer.append('\'');
215         }
216         if (noQuotes) return s; // faster
217         return quoteOperandBuffer.toString();
218     }
219 
hex(long i, int places)220     static public String hex(long i, int places) {
221         if (i == Long.MIN_VALUE) return "-8000000000000000";
222         boolean negative = i < 0;
223         if (negative) {
224             i = -i;
225         }
226         String result = Long.toString(i, 16).toUpperCase();
227         if (result.length() < places) {
228             result = "0000000000000000".substring(result.length(), places) + result;
229         }
230         if (negative) {
231             return '-' + result;
232         }
233         return result;
234     }
235 
236 }
237