• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.tool;
2 
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.text.ParseException;
6 import java.util.ArrayList;
7 import java.util.Arrays;
8 import java.util.Date;
9 import java.util.HashMap;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.TreeMap;
15 import java.util.regex.Matcher;
16 import java.util.regex.Pattern;
17 
18 import org.unicode.cldr.draft.FileUtilities;
19 import org.unicode.cldr.tool.Option.Options;
20 import org.unicode.cldr.util.CLDRPaths;
21 import org.unicode.cldr.util.ChainedMap;
22 import org.unicode.cldr.util.ChainedMap.M5;
23 import org.unicode.cldr.util.CldrUtility;
24 import org.unicode.cldr.util.Counter;
25 import org.unicode.cldr.util.Organization;
26 import org.unicode.cldr.util.Pair;
27 import org.unicode.cldr.util.VoteResolver.Level;
28 import org.unicode.cldr.util.XMLFileReader;
29 import org.unicode.cldr.util.XPathParts;
30 
31 import com.google.common.collect.ImmutableSet;
32 import com.ibm.icu.impl.Row.R2;
33 import com.ibm.icu.text.DateFormat;
34 import com.ibm.icu.text.SimpleDateFormat;
35 
36 public class ReadSql {
37     static final boolean DEBUG = false;
38     static UserMap umap = new UserMap(CLDRPaths.DATA_DIRECTORY + "cldr/users.xml");
39 
40     enum MyOptions {
41         organization(".*", "google", "organization"), verbose("", "", "verbose"),
42         ;
43 
44         // BOILERPLATE TO COPY
45         final Option option;
46 
MyOptions(String argumentPattern, String defaultArgument, String helpText)47         private MyOptions(String argumentPattern, String defaultArgument, String helpText) {
48             option = new Option(this, argumentPattern, defaultArgument, helpText);
49         }
50 
51         static Options options = new Options();
52         static {
53             for (MyOptions option : MyOptions.values()) {
options.add(option, option.option)54                 options.add(option, option.option);
55             }
56         }
57 
parse(String[] args, boolean showArguments)58         private static Set<String> parse(String[] args, boolean showArguments) {
59             return options.parse(MyOptions.values()[0], args, true);
60         }
61     }
62 
63     static Organization organization;
64     static boolean verbose;
65 
main(String[] args)66     public static void main(String[] args) throws IOException {
67         MyOptions.parse(args, true);
68         organization = Organization.valueOf(MyOptions.organization.option.getValue());
69         verbose = MyOptions.verbose.option.doesOccur();
70 
71         long max = Long.MAX_VALUE;
72         long maxItems = 10;
73         boolean inCreate = false;
74         try (BufferedReader r = FileUtilities.openFile(CLDRPaths.DATA_DIRECTORY, "cldr/cldr-DUMP-20160817.sql")) {
75             while (--max > 0) {
76                 String line = r.readLine();
77                 if (line == null) {
78                     break;
79                 }
80                 if (line.startsWith("INSERT")) {
81                     //System.out.println(trunc(line, 100));
82                     Data.parseLine(line, maxItems);
83                 } else if (line.startsWith("CREATE")) {
84                     inCreate = true;
85                     if (verbose) System.out.println(line);
86                 } else if (inCreate) {
87                     if (verbose) System.out.println(line);
88                     if (line.startsWith(") ENGINE")) {
89                         inCreate = false;
90                     }
91                 } else if (DEBUG) {
92                     if (verbose) System.out.println(line);
93                 }
94             }
95         }
96         Counter<String> keys = Data.getKeys();
97         for (R2<Long, String> e : keys.getEntrySetSortedByCount(false, null)) {
98             if (e.get0() > 0) {
99                 System.out.println(CldrUtility.toString(e));
100             }
101         }
102         Data.show("_30");
103     }
104 
trunc(String line, int len)105     private static String trunc(String line, int len) {
106         return line.length() <= len ? line : line.substring(0, len) + "…";
107     }
108 
109     static final Pattern INSERT = Pattern.compile("INSERT\\s+INTO\\s+`([^`]+)`\\s+VALUES\\s*");
110     static final DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); // 2014-05-01 17:19:57
111 
112     static class Items {
113         final Date date;
114         final User owner;
115         final List<String> raw;
116 
of(String key, List<String> raw)117         public static Items of(String key, List<String> raw) {
118             try {
119                 return new Items(key, raw);
120             } catch (Exception e) {
121                 System.out.println("No user for: " + key + ": " + raw);
122                 return null;
123             }
124         }
125 
Items(String key, List<String> raw)126         private Items(String key, List<String> raw) {
127             Date temp;
128             try {
129                 temp = df.parse(raw.get(raw.size() - 1));
130             } catch (ParseException e) {
131                 temp = null;
132             }
133             this.date = temp;
134             this.raw = raw;
135             if (temp == null) {
136                 owner = null;
137             } else {
138                 String ownerField;
139                 switch (key) {
140                 case "FEEDBACK":
141                     ownerField = raw.get(1);
142                     break;
143                 default:
144                     ownerField = raw.get(2);
145                     break;
146                 }
147                 owner = umap.get(ownerField);
148             }
149         }
150 
151         @Override
toString()152         public String toString() {
153             return (date == null ? "???" : df.format(date)) + ";\t" + owner + ";\t" + CldrUtility.toString(raw);
154         }
155     }
156 
157     static class DateMap {
158         M5<Integer, Integer, Integer, Integer, Boolean> yearMonthDays = ChainedMap.of(new TreeMap<>(), new TreeMap(), new TreeMap(), new TreeMap(),
159             Boolean.class);
160         int current = 0;
161 
add(Date d)162         void add(Date d) {
163             yearMonthDays.put(d.getYear() + 1900, d.getMonth() + 1, d.getDate(), current++, Boolean.TRUE);
164         }
165 
166         static DateFormat monthFormat = new SimpleDateFormat("MMM");
167 
168         @Override
toString()169         public String toString() {
170             StringBuilder result = new StringBuilder();
171             int years = 0;
172             for (Entry<Integer, Map<Integer, Map<Integer, Map<Integer, Boolean>>>> yearMonthDay : yearMonthDays) {
173                 if (years++ > 0) {
174                     result.append("; ");
175                 }
176                 final int year = yearMonthDay.getKey();
177                 result.append(year);
178                 result.append(": ");
179                 int months = 0;
180                 for (Entry<Integer, Map<Integer, Map<Integer, Boolean>>> monthDay : yearMonthDay.getValue().entrySet()) {
181                     if (months++ > 0) {
182                         result.append("; ");
183                     }
184                     final int month = monthDay.getKey();
185                     result.append(monthFormat.format(new Date(year - 1900, month - 1, 1)));
186                     result.append(": ");
187                     int days = 0;
188                     for (Entry<Integer, Map<Integer, Boolean>> dayCount : monthDay.getValue().entrySet()) {
189                         if (days++ > 0) {
190                             result.append(", ");
191                         }
192                         final int day = dayCount.getKey();
193                         result.append(day);
194                         final int count = dayCount.getValue().size();
195                         if (count > 1) {
196                             result.append('(');
197                             result.append(count);
198                             result.append(")");
199                         }
200                     }
201                 }
202             }
203             return result.toString();
204         }
205     }
206 
207     static class Data {
208         final String key;
209         final List<Items> dataItems = new ArrayList<>();
210         static Map<String, Data> map = new TreeMap<>();
211 
Data(String key)212         public Data(String key) {
213             this.key = key;
214         }
215 
getKeys()216         public static Counter<String> getKeys() {
217             Counter<String> items = new Counter();
218             for (Entry<String, Data> e : map.entrySet()) {
219                 items.add(e.getKey(), e.getValue().dataItems.size());
220             }
221             return items;
222         }
223 
show(String regex)224         public static void show(String regex) {
225             Matcher m = Pattern.compile(regex).matcher("");
226 
227             for (Entry<String, Data> e : map.entrySet()) {
228                 Data data = e.getValue();
229                 if (!m.reset(data.key).find()) {
230                     continue;
231                 }
232                 Counter<User> counter = new Counter<>();
233                 Map<User, DateMap> dateMaps = new HashMap<>();
234                 for (Items item : data.dataItems) {
235                     if (item.owner.org == organization) {
236                         counter.add(item.owner, 1);
237                         DateMap dateMap = dateMaps.get(item.owner);
238                         if (dateMap == null) {
239                             dateMaps.put(item.owner, dateMap = new DateMap());
240                         }
241                         dateMap.add(item.date);
242                     }
243                 }
244                 for (R2<Long, User> item : counter.getEntrySetSortedByCount(false, null)) {
245                     final Long count = item.get0();
246                     final User user = item.get1();
247                     System.out.println("key: " + data.key + "; count: " + count + "; " + user + "\t" + dateMaps.get(user));
248                 }
249             }
250         }
251 
252         @Override
toString()253         public String toString() {
254             return key + "=" + CldrUtility.toString(dataItems);
255         }
256 
add(ArrayList<String> items)257         public Items add(ArrayList<String> items) {
258             final Items items2 = Items.of(key, items);
259             if (items2 != null && items2.owner != null) {
260                 dataItems.add(items2);
261                 return items2;
262             }
263             return null;
264         }
265 
parseLine(String line, long maxItems)266         static void parseLine(String line, long maxItems) {
267             Matcher m = INSERT.matcher(line);
268             String key;
269             int i;
270             if (m.lookingAt()) {
271                 key = m.group(1);
272                 i = m.end();
273             } else {
274                 throw new IllegalArgumentException();
275             }
276             if (key.equals("FEEDBACK") || key.equals("sf_fora")) { // cf. private FeedBack.TABLE_FEEDBACK and public SurveyForum.DB_FORA
277                 return; // old format
278             }
279             boolean inQuote = false;
280             boolean skipComma = true;
281             StringBuilder buffer = new StringBuilder();
282             ArrayList<String> items = new ArrayList<>();
283             Data current = map.get(key);
284             if (current == null) {
285                 map.put(key, current = new Data(key));
286             }
287             ArrayList<Data> rows = new ArrayList<>();
288 
289             while (i < line.length()) {
290                 int cp = line.codePointAt(i);
291                 i += Character.charCount(cp);
292                 if (inQuote) {
293                     switch (cp) {
294                     case '\'':
295                         inQuote = false;
296                         break;
297                     case '\\':
298                         cp = line.codePointAt(i);
299                         i += Character.charCount(cp);
300                         // fall through
301                     default:
302                         buffer.appendCodePoint(cp);
303                         break;
304                     }
305                 } else {
306                     switch (cp) {
307                     case '\'':
308                         inQuote = true;
309                         break;
310                     case ',':
311                         if (!skipComma) {
312                             items.add(buffer.toString());
313                             buffer.setLength(0);
314                         }
315                         break;
316                     case ' ':
317                     case '\t':
318                     case '\n':
319                     case '(':
320                         skipComma = false;
321                         break;
322                     case ')':
323                         skipComma = true;
324                         items.add(buffer.toString());
325                         buffer.setLength(0);
326                         Items lastItem = current.add(items);
327                         if (--maxItems > 0 && lastItem != null) {
328                             if (verbose) System.out.println(key + "\t" + lastItem);
329                         }
330                         items = new ArrayList<>();
331                         break;
332                     case '\\':
333                         cp = line.codePointAt(i);
334                         i += Character.charCount(cp);
335                         // fall through
336                     default:
337                         buffer.appendCodePoint(cp);
338                         break;
339                     }
340                 }
341             }
342         }
343     }
344 
345     static class User {
346         final int id;
347         final String email;
348         final Level level;
349         final String name;
350         final Organization org;
351         final Set<String> locales;
352 
User(XPathParts parts)353         public User(XPathParts parts) {
354             this.id = Integer.parseInt(parts.getAttributeValue(-1, "id"));
355             this.email = parts.getAttributeValue(-1, "email");
356             this.level = Level.valueOf(parts.getAttributeValue(-1, "level"));
357             this.name = parts.getAttributeValue(-1, "name");
358             this.org = Organization.fromString(parts.getAttributeValue(-1, "org"));
359             this.locales = ImmutableSet.copyOf(Arrays.asList(parts.getAttributeValue(-1, "locales").split("[, ]+")));
360         }
361 
362         @Override
toString()363         public String toString() {
364             return "id: " + id
365                 + "; email: " + email
366                 + "; name: " + name
367                 + "; level: " + level
368                 + "; org: " + org
369                 + "; locales: " + locales;
370         }
371     }
372 
373     static class UserMap {
374         Map<Integer, User> map = new HashMap<>();
375 
UserMap(String filename)376         UserMap(String filename) {
377             List<Pair<String, String>> data = new ArrayList<>();
378             XMLFileReader.loadPathValues(filename, data, false);
379             //  <user id="1271" email="..." level="tc" name="..." org="adobe" locales="pt"/>
380             for (Pair<String, String> e : data) {
381                 String path = e.getFirst();
382                 XPathParts parts = XPathParts.getFrozenInstance(path);
383                 User user = new User(parts);
384                 map.put(user.id, user);
385             }
386         }
387 
get(String string)388         public User get(String string) {
389             return map.get(Integer.valueOf(string));
390         }
391     }
392 }
393