1 /* 2 ******************************************************************************* 3 * Copyright (C) 2002-2012, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package org.unicode.cldr.util; 8 9 import java.util.ArrayList; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.Iterator; 13 import java.util.Map; 14 import java.util.Random; 15 import java.util.Set; 16 17 import com.ibm.icu.text.UnicodeSet; 18 19 public class BNF { 20 private Map map = new HashMap(); 21 private Set variables = new HashSet(); 22 private Pick pick = null; 23 private Pick.Target target = null; 24 private Tokenizer t; 25 private Quoter quoter; 26 private Random random; 27 next()28 public String next() { 29 return target.next(); 30 } 31 getInternal()32 public String getInternal() { 33 return pick.getInternal(0, new HashSet()); 34 } 35 36 /* 37 + "weight = integer '%';" 38 + "range = '{' integer (',' integer?)? '}' weight*;" 39 + "quote = '@';" 40 + "star = '*' weight*;" 41 + "plus = '+' weight*;" 42 + "maybe = '?' weight?;" 43 + "quantifier = range | star | maybe | plus;" 44 + "core = string | unicodeSet | '(' alternation ')';" 45 + "sequence = (core quantifier*)+;" 46 + "alternation = sequence (weight? ('|' sequence weight?)+)?;" 47 + "rule = string '=' alternation;"; 48 49 50 * Match 0 or more times 51 + Match 1 or more times 52 ? Match 1 or 0 times 53 {n} Match exactly n times 54 {n,} Match at least n times 55 {n,m} Match at least n but not more than m times 56 57 58 59 */ 60 BNF(Random random, Quoter quoter)61 public BNF(Random random, Quoter quoter) { 62 this.random = random; 63 this.quoter = quoter; 64 t = new Tokenizer(); 65 } 66 addRules(String rules)67 public BNF addRules(String rules) { 68 t.setSource(rules); 69 while (addRule()) { 70 } 71 return this; // for chaining 72 } 73 complete()74 public BNF complete() { 75 // check that the rules match the variables, except for $root in rules 76 Set ruleSet = map.keySet(); 77 // add also 78 variables.add("$root"); 79 variables.addAll(t.getLookedUpItems()); 80 if (!ruleSet.equals(variables)) { 81 String msg = showDiff(variables, ruleSet); 82 if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg; 83 String temp = showDiff(ruleSet, variables); 84 if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp; 85 if (msg.length() == 0) 86 msg = temp; 87 else if (temp.length() != 0) { 88 msg = msg + "; " + temp; 89 } 90 error(msg); 91 } 92 93 if (!ruleSet.equals(variables)) { 94 String msg = showDiff(variables, ruleSet); 95 if (msg.length() != 0) msg = "Missing definitions for: " + msg; 96 String temp = showDiff(ruleSet, variables); 97 if (temp.length() != 0) temp = "Defined but not used: " + temp; 98 if (msg.length() == 0) 99 msg = temp; 100 else if (temp.length() != 0) { 101 msg = msg + "; " + temp; 102 } 103 error(msg); 104 } 105 106 // replace variables by definitions 107 Iterator it = ruleSet.iterator(); 108 while (it.hasNext()) { 109 String key = (String) it.next(); 110 Pick expression = (Pick) map.get(key); 111 Iterator it2 = ruleSet.iterator(); 112 if (false && key.equals("$crlf")) { 113 System.out.println("debug"); 114 } 115 while (it2.hasNext()) { 116 Object key2 = it2.next(); 117 if (key.equals(key2)) continue; 118 Pick expression2 = (Pick) map.get(key2); 119 expression2.replace(key, expression); 120 } 121 } 122 pick = (Pick) map.get("$root"); 123 target = Pick.Target.make(pick, random, quoter); 124 // TODO remove temp collections 125 return this; 126 } 127 showDiff(Set a, Set b)128 String showDiff(Set a, Set b) { 129 Set temp = new HashSet(); 130 temp.addAll(a); 131 temp.removeAll(b); 132 if (temp.size() == 0) return ""; 133 StringBuffer buffer = new StringBuffer(); 134 Iterator it = temp.iterator(); 135 while (it.hasNext()) { 136 if (buffer.length() != 0) buffer.append(", "); 137 buffer.append(it.next().toString()); 138 } 139 return buffer.toString(); 140 } 141 error(String msg)142 void error(String msg) { 143 throw new IllegalArgumentException(msg 144 + "\r\n" + t.toString()); 145 } 146 addRule()147 private boolean addRule() { 148 int type = t.next(); 149 if (type == Tokenizer.DONE) return false; 150 if (type != Tokenizer.STRING) error("missing weight"); 151 String s = t.getString(); 152 if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable"); 153 if (t.next() != '=') error("missing ="); 154 int startBody = t.index; 155 Pick rule = getAlternation(); 156 if (rule == null) error("missing expression"); 157 t.addSymbol(s, t.getSource(), startBody, t.index); 158 if (t.next() != ';') error("missing ;"); 159 return addPick(s, rule); 160 } 161 addPick(String s, Pick rule)162 protected boolean addPick(String s, Pick rule) { 163 Object temp = map.get(s); 164 if (temp != null) error("duplicate variable"); 165 if (rule.name == null) rule.name(s); 166 map.put(s, rule); 167 return true; 168 } 169 addSet(String variable, UnicodeSet set)170 public BNF addSet(String variable, UnicodeSet set) { 171 if (set != null) { 172 String body = set.toString(); 173 t.addSymbol(variable, body, 0, body.length()); 174 addPick(variable, Pick.codePoint(set)); 175 } 176 return this; 177 } 178 179 int maxRepeat = 99; 180 qualify(Pick item)181 Pick qualify(Pick item) { 182 int[] weights; 183 int type = t.next(); 184 switch (type) { 185 case '@': 186 return new Pick.Quote(item); 187 case '~': 188 return new Pick.Morph(item); 189 case '?': 190 int weight = getWeight(); 191 if (weight == NO_WEIGHT) weight = 50; 192 weights = new int[] { 100 - weight, weight }; 193 return Pick.repeat(0, 1, weights, item); 194 case '*': 195 weights = getWeights(); 196 return Pick.repeat(1, maxRepeat, weights, item); 197 case '+': 198 weights = getWeights(); 199 return Pick.repeat(1, maxRepeat, weights, item); 200 case '{': 201 if (t.next() != Tokenizer.NUMBER) error("missing number"); 202 int start = (int) t.getNumber(); 203 int end = start; 204 type = t.next(); 205 if (type == ',') { 206 end = maxRepeat; 207 type = t.next(); 208 if (type == Tokenizer.NUMBER) { 209 end = (int) t.getNumber(); 210 type = t.next(); 211 } 212 } 213 if (type != '}') error("missing }"); 214 weights = getWeights(); 215 return Pick.repeat(start, end, weights, item); 216 } 217 t.backup(); 218 return item; 219 } 220 getCore()221 Pick getCore() { 222 int token = t.next(); 223 if (token == Tokenizer.STRING) { 224 String s = t.getString(); 225 if (s.charAt(0) == '$') variables.add(s); 226 return Pick.string(s); 227 } 228 if (token == Tokenizer.UNICODESET) { 229 return Pick.codePoint(t.getUnicodeSet()); 230 } 231 if (token != '(') { 232 t.backup(); 233 return null; 234 } 235 Pick temp = getAlternation(); 236 token = t.next(); 237 if (token != ')') error("missing )"); 238 return temp; 239 } 240 getSequence()241 Pick getSequence() { 242 Pick.Sequence result = null; 243 Pick last = null; 244 while (true) { 245 Pick item = getCore(); 246 if (item == null) { 247 if (result != null) return result; 248 if (last != null) return last; 249 error("missing item"); 250 } 251 // qualify it as many times as possible 252 Pick oldItem; 253 do { 254 oldItem = item; 255 item = qualify(item); 256 } while (item != oldItem); 257 // add it in 258 if (last == null) { 259 last = item; 260 } else { 261 if (result == null) result = Pick.makeSequence().and2(last); 262 result = result.and2(item); 263 } 264 } 265 } 266 267 // for simplicity, we just use recursive descent getAlternation()268 Pick getAlternation() { 269 Pick.Alternation result = null; 270 Pick last = null; 271 int lastWeight = NO_WEIGHT; 272 while (true) { 273 Pick temp = getSequence(); 274 if (temp == null) error("empty alternation"); 275 int weight = getWeight(); 276 if (weight == NO_WEIGHT) weight = 1; 277 if (last == null) { 278 last = temp; 279 lastWeight = weight; 280 } else { 281 if (result == null) result = Pick.makeAlternation().or2(lastWeight, last); 282 result = result.or2(weight, temp); 283 } 284 int token = t.next(); 285 if (token != '|') { 286 t.backup(); 287 if (result != null) return result; 288 if (last != null) return last; 289 } 290 } 291 } 292 293 private static final int NO_WEIGHT = Integer.MIN_VALUE; 294 getWeight()295 int getWeight() { 296 int weight; 297 int token = t.next(); 298 if (token != Tokenizer.NUMBER) { 299 t.backup(); 300 return NO_WEIGHT; 301 } 302 weight = (int) t.getNumber(); 303 token = t.next(); 304 if (token != '%') error("missing %"); 305 return weight; 306 } 307 getWeights()308 int[] getWeights() { 309 ArrayList list = new ArrayList(); 310 while (true) { 311 int weight = getWeight(); 312 if (weight == NO_WEIGHT) break; 313 list.add(new Integer(weight)); 314 } 315 if (list.size() == 0) return null; 316 int[] result = new int[list.size()]; 317 for (int i = 0; i < list.size(); ++i) { 318 result[i] = ((Integer) list.get(i)).intValue(); 319 } 320 return result; 321 } 322 getMaxRepeat()323 public int getMaxRepeat() { 324 return maxRepeat; 325 } 326 setMaxRepeat(int maxRepeat)327 public BNF setMaxRepeat(int maxRepeat) { 328 this.maxRepeat = maxRepeat; 329 return this; 330 } 331 } 332