1 /* 2 ******************************************************************************* 3 * Copyright (C) 2002-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package org.unicode.cldr.util; 8 9 import java.util.ArrayList; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.Iterator; 13 import java.util.Map; 14 import java.util.Random; 15 import java.util.Set; 16 17 import com.ibm.icu.text.UnicodeSet; 18 19 public class BNF { 20 private Map map = new HashMap(); 21 private Set variables = new HashSet(); 22 private Pick pick = null; 23 private Pick.Target target = null; 24 private Tokenizer t; 25 private Quoter quoter; 26 private Random random; 27 next()28 public String next() { 29 return target.next(); 30 } 31 getInternal()32 public String getInternal() { 33 return pick.getInternal(0, new HashSet()); 34 } 35 36 /* 37 + "weight = integer '%';" 38 + "range = '{' integer (',' integer?)? '}' weight*;" 39 + "quote = '@';" 40 + "star = '*' weight*;" 41 + "plus = '+' weight*;" 42 + "maybe = '?' weight?;" 43 + "quantifier = range | star | maybe | plus;" 44 + "core = string | unicodeSet | '(' alternation ')';" 45 + "sequence = (core quantifier*)+;" 46 + "alternation = sequence (weight? ('|' sequence weight?)+)?;" 47 + "rule = string '=' alternation;"; 48 49 50 * Match 0 or more times 51 + Match 1 or more times 52 ? Match 1 or 0 times 53 {n} Match exactly n times 54 {n,} Match at least n times 55 {n,m} Match at least n but not more than m times 56 57 58 59 */ 60 BNF(Random random, Quoter quoter)61 public BNF(Random random, Quoter quoter) { 62 this.random = random; 63 this.quoter = quoter; 64 t = new Tokenizer(); 65 } 66 addRules(String rules)67 public BNF addRules(String rules) { 68 t.setSource(rules); 69 while (addRule()) { 70 } 71 return this; // for chaining 72 } 73 complete()74 public BNF complete() { 75 // check that the rules match the variables, except for $root in rules 76 Set ruleSet = map.keySet(); 77 // add also 78 variables.add("$root"); 79 variables.addAll(t.getLookedUpItems()); 80 if (!ruleSet.equals(variables)) { 81 String msg = showDiff(variables, ruleSet); 82 if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg; 83 String temp = showDiff(ruleSet, variables); 84 if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp; 85 if (msg.length() == 0) 86 msg = temp; 87 else if (temp.length() != 0) { 88 msg = msg + "; " + temp; 89 } 90 error(msg); 91 } 92 93 if (!ruleSet.equals(variables)) { 94 String msg = showDiff(variables, ruleSet); 95 if (msg.length() != 0) msg = "Missing definitions for: " + msg; 96 String temp = showDiff(ruleSet, variables); 97 if (temp.length() != 0) temp = "Defined but not used: " + temp; 98 if (msg.length() == 0) 99 msg = temp; 100 else if (temp.length() != 0) { 101 msg = msg + "; " + temp; 102 } 103 error(msg); 104 } 105 106 // replace variables by definitions 107 Iterator it = ruleSet.iterator(); 108 while (it.hasNext()) { 109 String key = (String) it.next(); 110 Pick expression = (Pick) map.get(key); 111 Iterator it2 = ruleSet.iterator(); 112 if (false && key.equals("$crlf")) { 113 System.out.println("debug"); 114 } 115 while (it2.hasNext()) { 116 Object key2 = it2.next(); 117 if (key.equals(key2)) continue; 118 Pick expression2 = (Pick) map.get(key2); 119 expression2.replace(key, expression); 120 } 121 } 122 pick = (Pick) map.get("$root"); 123 target = Pick.Target.make(pick, random, quoter); 124 // TODO remove temp collections 125 return this; 126 } 127 showDiff(Set a, Set b)128 String showDiff(Set a, Set b) { 129 Set temp = new HashSet(); 130 temp.addAll(a); 131 temp.removeAll(b); 132 if (temp.size() == 0) return ""; 133 StringBuffer buffer = new StringBuffer(); 134 Iterator it = temp.iterator(); 135 while (it.hasNext()) { 136 if (buffer.length() != 0) buffer.append(", "); 137 buffer.append(it.next().toString()); 138 } 139 return buffer.toString(); 140 } 141 error(String msg)142 void error(String msg) { 143 throw new IllegalArgumentException(msg 144 + "\r\n" + t.toString()); 145 } 146 addRule()147 private boolean addRule() { 148 int type = t.next(); 149 //System.out.println(type + "t.getString " + t.getString()); 150 if (type == Tokenizer.DONE) { 151 return false; 152 } 153 if (type != Tokenizer.STRING) { 154 error("missing weight"); 155 } 156 String s = t.getString(); 157 if (s.length() == 0 || s.charAt(0) != '$') { 158 error("missing $ in variable"); 159 } 160 if (t.next() != '=') { 161 error("missing ="); 162 } 163 int startBody = t.index; 164 Pick rule = getAlternation(); 165 if (rule == null) error("missing expression"); 166 t.addSymbol(s, t.getSource(), startBody, t.index); 167 if (t.next() != ';') { 168 error("missing ;"); 169 } 170 return addPick(s, rule); 171 } 172 addPick(String s, Pick rule)173 protected boolean addPick(String s, Pick rule) { 174 Object temp = map.get(s); 175 if (temp != null) error("duplicate variable"); 176 if (rule.name == null) rule.name(s); 177 map.put(s, rule); 178 return true; 179 } 180 addSet(String variable, UnicodeSet set)181 public BNF addSet(String variable, UnicodeSet set) { 182 if (set != null) { 183 String body = set.toString(); 184 t.addSymbol(variable, body, 0, body.length()); 185 addPick(variable, Pick.codePoint(set)); 186 } 187 return this; 188 } 189 190 int maxRepeat = 99; 191 qualify(Pick item)192 Pick qualify(Pick item) { 193 int[] weights; 194 int type = t.next(); 195 switch (type) { 196 case '@': 197 return new Pick.Quote(item); 198 case '~': 199 return new Pick.Morph(item); 200 case '?': 201 int weight = getWeight(); 202 if (weight == NO_WEIGHT) weight = 50; 203 weights = new int[] { 100 - weight, weight }; 204 return Pick.repeat(0, 1, weights, item); 205 case '*': 206 weights = getWeights(); 207 return Pick.repeat(1, maxRepeat, weights, item); 208 case '+': 209 weights = getWeights(); 210 return Pick.repeat(1, maxRepeat, weights, item); 211 case '{': 212 if (t.next() != Tokenizer.NUMBER) error("missing number"); 213 int start = (int) t.getNumber(); 214 int end = start; 215 type = t.next(); 216 if (type == ',') { 217 end = maxRepeat; 218 type = t.next(); 219 if (type == Tokenizer.NUMBER) { 220 end = (int) t.getNumber(); 221 type = t.next(); 222 } 223 } 224 if (type != '}') error("missing }"); 225 weights = getWeights(); 226 return Pick.repeat(start, end, weights, item); 227 } 228 t.backup(); 229 return item; 230 } 231 getCore()232 Pick getCore() { 233 int token = t.next(); 234 if (token == Tokenizer.STRING) { 235 String s = t.getString(); 236 if (s.charAt(0) == '$') variables.add(s); 237 return Pick.string(s); 238 } 239 if (token == Tokenizer.UNICODESET) { 240 return Pick.codePoint(t.getUnicodeSet()); 241 } 242 if (token != '(') { 243 t.backup(); 244 return null; 245 } 246 Pick temp = getAlternation(); 247 token = t.next(); 248 if (token != ')') error("missing )"); 249 return temp; 250 } 251 getSequence()252 Pick getSequence() { 253 Pick.Sequence result = null; 254 Pick last = null; 255 while (true) { 256 Pick item = getCore(); 257 if (item == null) { 258 if (result != null) return result; 259 if (last != null) return last; 260 error("missing item in sequence"); 261 } 262 // qualify it as many times as possible 263 Pick oldItem; 264 do { 265 oldItem = item; 266 item = qualify(item); 267 } while (item != oldItem); 268 // add it in 269 if (last == null) { 270 last = item; 271 } else { 272 if (result == null) result = Pick.makeSequence().and2(last); 273 result = result.and2(item); 274 } 275 } 276 } 277 278 // for simplicity, we just use recursive descent getAlternation()279 Pick getAlternation() { 280 Pick.Alternation result = null; 281 Pick last = null; 282 int lastWeight = NO_WEIGHT; 283 while (true) { 284 Pick temp = getSequence(); 285 if (temp == null) error("empty alternation"); 286 int weight = getWeight(); 287 if (weight == NO_WEIGHT) weight = 1; 288 if (last == null) { 289 last = temp; 290 lastWeight = weight; 291 } else { 292 if (result == null) result = Pick.makeAlternation().or2(lastWeight, last); 293 result = result.or2(weight, temp); 294 } 295 int token = t.next(); 296 if (token != '|') { 297 t.backup(); 298 if (result != null) return result; 299 if (last != null) return last; 300 } 301 } 302 } 303 304 private static final int NO_WEIGHT = Integer.MIN_VALUE; 305 getWeight()306 int getWeight() { 307 int weight; 308 int token = t.next(); 309 if (token != Tokenizer.NUMBER) { 310 t.backup(); 311 return NO_WEIGHT; 312 } 313 weight = (int) t.getNumber(); 314 token = t.next(); 315 if (token != '%') error("missing %"); 316 return weight; 317 } 318 getWeights()319 int[] getWeights() { 320 ArrayList list = new ArrayList(); 321 while (true) { 322 int weight = getWeight(); 323 if (weight == NO_WEIGHT) break; 324 list.add(new Integer(weight)); 325 } 326 if (list.size() == 0) return null; 327 int[] result = new int[list.size()]; 328 for (int i = 0; i < list.size(); ++i) { 329 result[i] = ((Integer) list.get(i)).intValue(); 330 } 331 return result; 332 } 333 getMaxRepeat()334 public int getMaxRepeat() { 335 return maxRepeat; 336 } 337 setMaxRepeat(int maxRepeat)338 public BNF setMaxRepeat(int maxRepeat) { 339 this.maxRepeat = maxRepeat; 340 return this; 341 } 342 } 343