1 /* 2 ******************************************************************************* 3 * Copyright (C) 2002-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package org.unicode.cldr.util; 8 9 import com.ibm.icu.text.UnicodeSet; 10 import java.util.ArrayList; 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.Iterator; 14 import java.util.Map; 15 import java.util.Random; 16 import java.util.Set; 17 18 public class BNF { 19 private Map map = new HashMap(); 20 private Set variables = new HashSet(); 21 private Pick pick = null; 22 private Pick.Target target = null; 23 private Tokenizer t; 24 private Quoter quoter; 25 private Random random; 26 next()27 public String next() { 28 return target.next(); 29 } 30 getInternal()31 public String getInternal() { 32 return pick.getInternal(0, new HashSet()); 33 } 34 35 /* 36 + "weight = integer '%';" 37 + "range = '{' integer (',' integer?)? '}' weight*;" 38 + "quote = '@';" 39 + "star = '*' weight*;" 40 + "plus = '+' weight*;" 41 + "maybe = '?' weight?;" 42 + "quantifier = range | star | maybe | plus;" 43 + "core = string | unicodeSet | '(' alternation ')';" 44 + "sequence = (core quantifier*)+;" 45 + "alternation = sequence (weight? ('|' sequence weight?)+)?;" 46 + "rule = string '=' alternation;"; 47 48 49 * Match 0 or more times 50 + Match 1 or more times 51 ? Match 1 or 0 times 52 {n} Match exactly n times 53 {n,} Match at least n times 54 {n,m} Match at least n but not more than m times 55 56 57 58 */ 59 BNF(Random random, Quoter quoter)60 public BNF(Random random, Quoter quoter) { 61 this.random = random; 62 this.quoter = quoter; 63 t = new Tokenizer(); 64 } 65 addRules(String rules)66 public BNF addRules(String rules) { 67 t.setSource(rules); 68 while (addRule()) {} 69 return this; // for chaining 70 } 71 complete()72 public BNF complete() { 73 // check that the rules match the variables, except for $root in rules 74 Set ruleSet = map.keySet(); 75 // add also 76 variables.add("$root"); 77 variables.addAll(t.getLookedUpItems()); 78 if (!ruleSet.equals(variables)) { 79 String msg = showDiff(variables, ruleSet); 80 if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg; 81 String temp = showDiff(ruleSet, variables); 82 if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp; 83 if (msg.length() == 0) msg = temp; 84 else if (temp.length() != 0) { 85 msg = msg + "; " + temp; 86 } 87 error(msg); 88 } 89 90 if (!ruleSet.equals(variables)) { 91 String msg = showDiff(variables, ruleSet); 92 if (msg.length() != 0) msg = "Missing definitions for: " + msg; 93 String temp = showDiff(ruleSet, variables); 94 if (temp.length() != 0) temp = "Defined but not used: " + temp; 95 if (msg.length() == 0) msg = temp; 96 else if (temp.length() != 0) { 97 msg = msg + "; " + temp; 98 } 99 error(msg); 100 } 101 102 // replace variables by definitions 103 Iterator it = ruleSet.iterator(); 104 while (it.hasNext()) { 105 String key = (String) it.next(); 106 Pick expression = (Pick) map.get(key); 107 Iterator it2 = ruleSet.iterator(); 108 if (false && key.equals("$crlf")) { 109 System.out.println("debug"); 110 } 111 while (it2.hasNext()) { 112 Object key2 = it2.next(); 113 if (key.equals(key2)) continue; 114 Pick expression2 = (Pick) map.get(key2); 115 expression2.replace(key, expression); 116 } 117 } 118 pick = (Pick) map.get("$root"); 119 target = Pick.Target.make(pick, random, quoter); 120 // TODO remove temp collections 121 return this; 122 } 123 showDiff(Set a, Set b)124 String showDiff(Set a, Set b) { 125 Set temp = new HashSet(); 126 temp.addAll(a); 127 temp.removeAll(b); 128 if (temp.size() == 0) return ""; 129 StringBuffer buffer = new StringBuffer(); 130 Iterator it = temp.iterator(); 131 while (it.hasNext()) { 132 if (buffer.length() != 0) buffer.append(", "); 133 buffer.append(it.next().toString()); 134 } 135 return buffer.toString(); 136 } 137 error(String msg)138 void error(String msg) { 139 throw new IllegalArgumentException(msg + "\r\n" + t.toString()); 140 } 141 addRule()142 private boolean addRule() { 143 int type = t.next(); 144 // System.out.println(type + "t.getString " + t.getString()); 145 if (type == Tokenizer.DONE) { 146 return false; 147 } 148 if (type != Tokenizer.STRING) { 149 error("missing weight"); 150 } 151 String s = t.getString(); 152 if (s.length() == 0 || s.charAt(0) != '$') { 153 error("missing $ in variable"); 154 } 155 if (t.next() != '=') { 156 error("missing ="); 157 } 158 int startBody = t.index; 159 Pick rule = getAlternation(); 160 if (rule == null) error("missing expression"); 161 t.addSymbol(s, t.getSource(), startBody, t.index); 162 if (t.next() != ';') { 163 error("missing ;"); 164 } 165 return addPick(s, rule); 166 } 167 addPick(String s, Pick rule)168 protected boolean addPick(String s, Pick rule) { 169 Object temp = map.get(s); 170 if (temp != null) error("duplicate variable"); 171 if (rule.name == null) rule.name(s); 172 map.put(s, rule); 173 return true; 174 } 175 addSet(String variable, UnicodeSet set)176 public BNF addSet(String variable, UnicodeSet set) { 177 if (set != null) { 178 String body = set.toString(); 179 t.addSymbol(variable, body, 0, body.length()); 180 addPick(variable, Pick.codePoint(set)); 181 } 182 return this; 183 } 184 185 int maxRepeat = 99; 186 qualify(Pick item)187 Pick qualify(Pick item) { 188 int[] weights; 189 int type = t.next(); 190 switch (type) { 191 case '@': 192 return new Pick.Quote(item); 193 case '~': 194 return new Pick.Morph(item); 195 case '?': 196 int weight = getWeight(); 197 if (weight == NO_WEIGHT) weight = 50; 198 weights = new int[] {100 - weight, weight}; 199 return Pick.repeat(0, 1, weights, item); 200 case '*': 201 weights = getWeights(); 202 return Pick.repeat(1, maxRepeat, weights, item); 203 case '+': 204 weights = getWeights(); 205 return Pick.repeat(1, maxRepeat, weights, item); 206 case '{': 207 if (t.next() != Tokenizer.NUMBER) error("missing number"); 208 int start = (int) t.getNumber(); 209 int end = start; 210 type = t.next(); 211 if (type == ',') { 212 end = maxRepeat; 213 type = t.next(); 214 if (type == Tokenizer.NUMBER) { 215 end = (int) t.getNumber(); 216 type = t.next(); 217 } 218 } 219 if (type != '}') error("missing }"); 220 weights = getWeights(); 221 return Pick.repeat(start, end, weights, item); 222 } 223 t.backup(); 224 return item; 225 } 226 getCore()227 Pick getCore() { 228 int token = t.next(); 229 if (token == Tokenizer.STRING) { 230 String s = t.getString(); 231 if (s.charAt(0) == '$') variables.add(s); 232 return Pick.string(s); 233 } 234 if (token == Tokenizer.UNICODESET) { 235 return Pick.codePoint(t.getUnicodeSet()); 236 } 237 if (token != '(') { 238 t.backup(); 239 return null; 240 } 241 Pick temp = getAlternation(); 242 token = t.next(); 243 if (token != ')') error("missing )"); 244 return temp; 245 } 246 getSequence()247 Pick getSequence() { 248 Pick.Sequence result = null; 249 Pick last = null; 250 while (true) { 251 Pick item = getCore(); 252 if (item == null) { 253 if (result != null) return result; 254 if (last != null) return last; 255 error("missing item in sequence"); 256 } 257 // qualify it as many times as possible 258 Pick oldItem; 259 do { 260 oldItem = item; 261 item = qualify(item); 262 } while (item != oldItem); 263 // add it in 264 if (last == null) { 265 last = item; 266 } else { 267 if (result == null) result = Pick.makeSequence().and2(last); 268 result = result.and2(item); 269 } 270 } 271 } 272 273 // for simplicity, we just use recursive descent getAlternation()274 Pick getAlternation() { 275 Pick.Alternation result = null; 276 Pick last = null; 277 int lastWeight = NO_WEIGHT; 278 while (true) { 279 Pick temp = getSequence(); 280 if (temp == null) error("empty alternation"); 281 int weight = getWeight(); 282 if (weight == NO_WEIGHT) weight = 1; 283 if (last == null) { 284 last = temp; 285 lastWeight = weight; 286 } else { 287 if (result == null) result = Pick.makeAlternation().or2(lastWeight, last); 288 result = result.or2(weight, temp); 289 } 290 int token = t.next(); 291 if (token != '|') { 292 t.backup(); 293 if (result != null) return result; 294 if (last != null) return last; 295 } 296 } 297 } 298 299 private static final int NO_WEIGHT = Integer.MIN_VALUE; 300 getWeight()301 int getWeight() { 302 int weight; 303 int token = t.next(); 304 if (token != Tokenizer.NUMBER) { 305 t.backup(); 306 return NO_WEIGHT; 307 } 308 weight = (int) t.getNumber(); 309 token = t.next(); 310 if (token != '%') error("missing %"); 311 return weight; 312 } 313 getWeights()314 int[] getWeights() { 315 ArrayList<Integer> list = new ArrayList(); 316 while (true) { 317 int weight = getWeight(); 318 if (weight == NO_WEIGHT) break; 319 list.add(weight); 320 } 321 if (list.isEmpty()) return null; 322 int[] result = new int[list.size()]; 323 for (int i = 0; i < list.size(); ++i) { 324 result[i] = list.get(i).intValue(); 325 } 326 return result; 327 } 328 getMaxRepeat()329 public int getMaxRepeat() { 330 return maxRepeat; 331 } 332 setMaxRepeat(int maxRepeat)333 public BNF setMaxRepeat(int maxRepeat) { 334 this.maxRepeat = maxRepeat; 335 return this; 336 } 337 } 338