1 package org.unicode.cldr.tool; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.text.ParseException; 6 import java.util.ArrayList; 7 import java.util.Arrays; 8 import java.util.Date; 9 import java.util.HashMap; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.TreeMap; 15 import java.util.regex.Matcher; 16 import java.util.regex.Pattern; 17 18 import org.unicode.cldr.draft.FileUtilities; 19 import org.unicode.cldr.tool.Option.Options; 20 import org.unicode.cldr.util.CLDRPaths; 21 import org.unicode.cldr.util.ChainedMap; 22 import org.unicode.cldr.util.ChainedMap.M5; 23 import org.unicode.cldr.util.CldrUtility; 24 import org.unicode.cldr.util.Counter; 25 import org.unicode.cldr.util.Organization; 26 import org.unicode.cldr.util.Pair; 27 import org.unicode.cldr.util.VoteResolver.Level; 28 import org.unicode.cldr.util.XMLFileReader; 29 import org.unicode.cldr.util.XPathParts; 30 31 import com.google.common.collect.ImmutableSet; 32 import com.ibm.icu.impl.Row.R2; 33 import com.ibm.icu.text.DateFormat; 34 import com.ibm.icu.text.SimpleDateFormat; 35 36 public class ReadSql { 37 static final boolean DEBUG = false; 38 static UserMap umap = new UserMap(CLDRPaths.DATA_DIRECTORY + "cldr/users.xml"); 39 40 enum MyOptions { 41 organization(".*", "google", "organization"), verbose("", "", "verbose"), 42 ; 43 44 // BOILERPLATE TO COPY 45 final Option option; 46 MyOptions(String argumentPattern, String defaultArgument, String helpText)47 private MyOptions(String argumentPattern, String defaultArgument, String helpText) { 48 option = new Option(this, argumentPattern, defaultArgument, helpText); 49 } 50 51 static Options options = new Options(); 52 static { 53 for (MyOptions option : MyOptions.values()) { options.add(option, option.option)54 options.add(option, option.option); 55 } 56 } 57 parse(String[] args, boolean showArguments)58 private static Set<String> parse(String[] args, boolean showArguments) { 59 return options.parse(MyOptions.values()[0], args, true); 60 } 61 } 62 63 static Organization organization; 64 static boolean verbose; 65 main(String[] args)66 public static void main(String[] args) throws IOException { 67 MyOptions.parse(args, true); 68 organization = Organization.valueOf(MyOptions.organization.option.getValue()); 69 verbose = MyOptions.verbose.option.doesOccur(); 70 71 long max = Long.MAX_VALUE; 72 long maxItems = 10; 73 boolean inCreate = false; 74 try (BufferedReader r = FileUtilities.openFile(CLDRPaths.DATA_DIRECTORY, "cldr/cldr-DUMP-20160817.sql")) { 75 while (--max > 0) { 76 String line = r.readLine(); 77 if (line == null) { 78 break; 79 } 80 if (line.startsWith("INSERT")) { 81 //System.out.println(trunc(line, 100)); 82 Data.parseLine(line, maxItems); 83 } else if (line.startsWith("CREATE")) { 84 inCreate = true; 85 if (verbose) System.out.println(line); 86 } else if (inCreate) { 87 if (verbose) System.out.println(line); 88 if (line.startsWith(") ENGINE")) { 89 inCreate = false; 90 } 91 } else if (DEBUG) { 92 if (verbose) System.out.println(line); 93 } 94 } 95 } 96 Counter<String> keys = Data.getKeys(); 97 for (R2<Long, String> e : keys.getEntrySetSortedByCount(false, null)) { 98 if (e.get0() > 0) { 99 System.out.println(CldrUtility.toString(e)); 100 } 101 } 102 Data.show("_30"); 103 } 104 trunc(String line, int len)105 private static String trunc(String line, int len) { 106 return line.length() <= len ? line : line.substring(0, len) + "…"; 107 } 108 109 static final Pattern INSERT = Pattern.compile("INSERT\\s+INTO\\s+`([^`]+)`\\s+VALUES\\s*"); 110 static final DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); // 2014-05-01 17:19:57 111 112 static class Items { 113 final Date date; 114 final User owner; 115 final List<String> raw; 116 of(String key, List<String> raw)117 public static Items of(String key, List<String> raw) { 118 try { 119 return new Items(key, raw); 120 } catch (Exception e) { 121 System.out.println("No user for: " + key + ": " + raw); 122 return null; 123 } 124 } 125 Items(String key, List<String> raw)126 private Items(String key, List<String> raw) { 127 Date temp; 128 try { 129 temp = df.parse(raw.get(raw.size() - 1)); 130 } catch (ParseException e) { 131 temp = null; 132 } 133 this.date = temp; 134 this.raw = raw; 135 if (temp == null) { 136 owner = null; 137 } else { 138 String ownerField; 139 switch (key) { 140 case "FEEDBACK": 141 ownerField = raw.get(1); 142 break; 143 default: 144 ownerField = raw.get(2); 145 break; 146 } 147 owner = umap.get(ownerField); 148 } 149 } 150 151 @Override toString()152 public String toString() { 153 return (date == null ? "???" : df.format(date)) + ";\t" + owner + ";\t" + CldrUtility.toString(raw); 154 } 155 } 156 157 static class DateMap { 158 M5<Integer, Integer, Integer, Integer, Boolean> yearMonthDays = ChainedMap.of(new TreeMap<>(), new TreeMap(), new TreeMap(), new TreeMap(), 159 Boolean.class); 160 int current = 0; 161 add(Date d)162 void add(Date d) { 163 yearMonthDays.put(d.getYear() + 1900, d.getMonth() + 1, d.getDate(), current++, Boolean.TRUE); 164 } 165 166 static DateFormat monthFormat = new SimpleDateFormat("MMM"); 167 168 @Override toString()169 public String toString() { 170 StringBuilder result = new StringBuilder(); 171 int years = 0; 172 for (Entry<Integer, Map<Integer, Map<Integer, Map<Integer, Boolean>>>> yearMonthDay : yearMonthDays) { 173 if (years++ > 0) { 174 result.append("; "); 175 } 176 final int year = yearMonthDay.getKey(); 177 result.append(year); 178 result.append(": "); 179 int months = 0; 180 for (Entry<Integer, Map<Integer, Map<Integer, Boolean>>> monthDay : yearMonthDay.getValue().entrySet()) { 181 if (months++ > 0) { 182 result.append("; "); 183 } 184 final int month = monthDay.getKey(); 185 result.append(monthFormat.format(new Date(year - 1900, month - 1, 1))); 186 result.append(": "); 187 int days = 0; 188 for (Entry<Integer, Map<Integer, Boolean>> dayCount : monthDay.getValue().entrySet()) { 189 if (days++ > 0) { 190 result.append(", "); 191 } 192 final int day = dayCount.getKey(); 193 result.append(day); 194 final int count = dayCount.getValue().size(); 195 if (count > 1) { 196 result.append('('); 197 result.append(count); 198 result.append(")"); 199 } 200 } 201 } 202 } 203 return result.toString(); 204 } 205 } 206 207 static class Data { 208 final String key; 209 final List<Items> dataItems = new ArrayList<>(); 210 static Map<String, Data> map = new TreeMap<>(); 211 Data(String key)212 public Data(String key) { 213 this.key = key; 214 } 215 getKeys()216 public static Counter<String> getKeys() { 217 Counter<String> items = new Counter(); 218 for (Entry<String, Data> e : map.entrySet()) { 219 items.add(e.getKey(), e.getValue().dataItems.size()); 220 } 221 return items; 222 } 223 show(String regex)224 public static void show(String regex) { 225 Matcher m = Pattern.compile(regex).matcher(""); 226 227 for (Entry<String, Data> e : map.entrySet()) { 228 Data data = e.getValue(); 229 if (!m.reset(data.key).find()) { 230 continue; 231 } 232 Counter<User> counter = new Counter<>(); 233 Map<User, DateMap> dateMaps = new HashMap<>(); 234 for (Items item : data.dataItems) { 235 if (item.owner.org == organization) { 236 counter.add(item.owner, 1); 237 DateMap dateMap = dateMaps.get(item.owner); 238 if (dateMap == null) { 239 dateMaps.put(item.owner, dateMap = new DateMap()); 240 } 241 dateMap.add(item.date); 242 } 243 } 244 for (R2<Long, User> item : counter.getEntrySetSortedByCount(false, null)) { 245 final Long count = item.get0(); 246 final User user = item.get1(); 247 System.out.println("key: " + data.key + "; count: " + count + "; " + user + "\t" + dateMaps.get(user)); 248 } 249 } 250 } 251 252 @Override toString()253 public String toString() { 254 return key + "=" + CldrUtility.toString(dataItems); 255 } 256 add(ArrayList<String> items)257 public Items add(ArrayList<String> items) { 258 final Items items2 = Items.of(key, items); 259 if (items2 != null && items2.owner != null) { 260 dataItems.add(items2); 261 return items2; 262 } 263 return null; 264 } 265 parseLine(String line, long maxItems)266 static void parseLine(String line, long maxItems) { 267 Matcher m = INSERT.matcher(line); 268 String key; 269 int i; 270 if (m.lookingAt()) { 271 key = m.group(1); 272 i = m.end(); 273 } else { 274 throw new IllegalArgumentException(); 275 } 276 if (key.equals("FEEDBACK") || key.equals("sf_fora")) { // cf. private FeedBack.TABLE_FEEDBACK and public SurveyForum.DB_FORA 277 return; // old format 278 } 279 boolean inQuote = false; 280 boolean skipComma = true; 281 StringBuilder buffer = new StringBuilder(); 282 ArrayList<String> items = new ArrayList<>(); 283 Data current = map.get(key); 284 if (current == null) { 285 map.put(key, current = new Data(key)); 286 } 287 ArrayList<Data> rows = new ArrayList<>(); 288 289 while (i < line.length()) { 290 int cp = line.codePointAt(i); 291 i += Character.charCount(cp); 292 if (inQuote) { 293 switch (cp) { 294 case '\'': 295 inQuote = false; 296 break; 297 case '\\': 298 cp = line.codePointAt(i); 299 i += Character.charCount(cp); 300 // fall through 301 default: 302 buffer.appendCodePoint(cp); 303 break; 304 } 305 } else { 306 switch (cp) { 307 case '\'': 308 inQuote = true; 309 break; 310 case ',': 311 if (!skipComma) { 312 items.add(buffer.toString()); 313 buffer.setLength(0); 314 } 315 break; 316 case ' ': 317 case '\t': 318 case '\n': 319 case '(': 320 skipComma = false; 321 break; 322 case ')': 323 skipComma = true; 324 items.add(buffer.toString()); 325 buffer.setLength(0); 326 Items lastItem = current.add(items); 327 if (--maxItems > 0 && lastItem != null) { 328 if (verbose) System.out.println(key + "\t" + lastItem); 329 } 330 items = new ArrayList<>(); 331 break; 332 case '\\': 333 cp = line.codePointAt(i); 334 i += Character.charCount(cp); 335 // fall through 336 default: 337 buffer.appendCodePoint(cp); 338 break; 339 } 340 } 341 } 342 } 343 } 344 345 static class User { 346 final int id; 347 final String email; 348 final Level level; 349 final String name; 350 final Organization org; 351 final Set<String> locales; 352 User(XPathParts parts)353 public User(XPathParts parts) { 354 this.id = Integer.parseInt(parts.getAttributeValue(-1, "id")); 355 this.email = parts.getAttributeValue(-1, "email"); 356 this.level = Level.valueOf(parts.getAttributeValue(-1, "level")); 357 this.name = parts.getAttributeValue(-1, "name"); 358 this.org = Organization.fromString(parts.getAttributeValue(-1, "org")); 359 this.locales = ImmutableSet.copyOf(Arrays.asList(parts.getAttributeValue(-1, "locales").split("[, ]+"))); 360 } 361 362 @Override toString()363 public String toString() { 364 return "id: " + id 365 + "; email: " + email 366 + "; name: " + name 367 + "; level: " + level 368 + "; org: " + org 369 + "; locales: " + locales; 370 } 371 } 372 373 static class UserMap { 374 Map<Integer, User> map = new HashMap<>(); 375 UserMap(String filename)376 UserMap(String filename) { 377 List<Pair<String, String>> data = new ArrayList<>(); 378 XMLFileReader.loadPathValues(filename, data, false); 379 // <user id="1271" email="..." level="tc" name="..." org="adobe" locales="pt"/> 380 for (Pair<String, String> e : data) { 381 String path = e.getFirst(); 382 XPathParts parts = XPathParts.getFrozenInstance(path); 383 User user = new User(parts); 384 map.put(user.id, user); 385 } 386 } 387 get(String string)388 public User get(String string) { 389 return map.get(Integer.valueOf(string)); 390 } 391 } 392 } 393