• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 2002-2012, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package org.unicode.cldr.util;
8 
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.Map;
14 import java.util.Random;
15 import java.util.Set;
16 
17 import com.ibm.icu.text.UnicodeSet;
18 
19 public class BNF {
20     private Map map = new HashMap();
21     private Set variables = new HashSet();
22     private Pick pick = null;
23     private Pick.Target target = null;
24     private Tokenizer t;
25     private Quoter quoter;
26     private Random random;
27 
next()28     public String next() {
29         return target.next();
30     }
31 
getInternal()32     public String getInternal() {
33         return pick.getInternal(0, new HashSet());
34     }
35 
36     /*
37     + "weight = integer '%';"
38     + "range = '{' integer (',' integer?)? '}' weight*;"
39     + "quote = '@';"
40     + "star = '*' weight*;"
41     + "plus = '+' weight*;"
42     + "maybe = '?' weight?;"
43     + "quantifier = range | star | maybe | plus;"
44     + "core = string | unicodeSet | '(' alternation ')';"
45     + "sequence = (core quantifier*)+;"
46     + "alternation = sequence (weight? ('|' sequence weight?)+)?;"
47     + "rule = string '=' alternation;";
48 
49 
50     *      Match 0 or more times
51     +      Match 1 or more times
52     ?      Match 1 or 0 times
53     {n}    Match exactly n times
54     {n,}   Match at least n times
55     {n,m}  Match at least n but not more than m times
56 
57 
58 
59     */
60 
BNF(Random random, Quoter quoter)61     public BNF(Random random, Quoter quoter) {
62         this.random = random;
63         this.quoter = quoter;
64         t = new Tokenizer();
65     }
66 
addRules(String rules)67     public BNF addRules(String rules) {
68         t.setSource(rules);
69         while (addRule()) {
70         }
71         return this; // for chaining
72     }
73 
complete()74     public BNF complete() {
75         // check that the rules match the variables, except for $root in rules
76         Set ruleSet = map.keySet();
77         // add also
78         variables.add("$root");
79         variables.addAll(t.getLookedUpItems());
80         if (!ruleSet.equals(variables)) {
81             String msg = showDiff(variables, ruleSet);
82             if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg;
83             String temp = showDiff(ruleSet, variables);
84             if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp;
85             if (msg.length() == 0)
86                 msg = temp;
87             else if (temp.length() != 0) {
88                 msg = msg + "; " + temp;
89             }
90             error(msg);
91         }
92 
93         if (!ruleSet.equals(variables)) {
94             String msg = showDiff(variables, ruleSet);
95             if (msg.length() != 0) msg = "Missing definitions for: " + msg;
96             String temp = showDiff(ruleSet, variables);
97             if (temp.length() != 0) temp = "Defined but not used: " + temp;
98             if (msg.length() == 0)
99                 msg = temp;
100             else if (temp.length() != 0) {
101                 msg = msg + "; " + temp;
102             }
103             error(msg);
104         }
105 
106         // replace variables by definitions
107         Iterator it = ruleSet.iterator();
108         while (it.hasNext()) {
109             String key = (String) it.next();
110             Pick expression = (Pick) map.get(key);
111             Iterator it2 = ruleSet.iterator();
112             if (false && key.equals("$crlf")) {
113                 System.out.println("debug");
114             }
115             while (it2.hasNext()) {
116                 Object key2 = it2.next();
117                 if (key.equals(key2)) continue;
118                 Pick expression2 = (Pick) map.get(key2);
119                 expression2.replace(key, expression);
120             }
121         }
122         pick = (Pick) map.get("$root");
123         target = Pick.Target.make(pick, random, quoter);
124         // TODO remove temp collections
125         return this;
126     }
127 
showDiff(Set a, Set b)128     String showDiff(Set a, Set b) {
129         Set temp = new HashSet();
130         temp.addAll(a);
131         temp.removeAll(b);
132         if (temp.size() == 0) return "";
133         StringBuffer buffer = new StringBuffer();
134         Iterator it = temp.iterator();
135         while (it.hasNext()) {
136             if (buffer.length() != 0) buffer.append(", ");
137             buffer.append(it.next().toString());
138         }
139         return buffer.toString();
140     }
141 
error(String msg)142     void error(String msg) {
143         throw new IllegalArgumentException(msg
144             + "\r\n" + t.toString());
145     }
146 
addRule()147     private boolean addRule() {
148         int type = t.next();
149         if (type == Tokenizer.DONE) return false;
150         if (type != Tokenizer.STRING) error("missing weight");
151         String s = t.getString();
152         if (s.length() == 0 || s.charAt(0) != '$') error("missing $ in variable");
153         if (t.next() != '=') error("missing =");
154         int startBody = t.index;
155         Pick rule = getAlternation();
156         if (rule == null) error("missing expression");
157         t.addSymbol(s, t.getSource(), startBody, t.index);
158         if (t.next() != ';') error("missing ;");
159         return addPick(s, rule);
160     }
161 
addPick(String s, Pick rule)162     protected boolean addPick(String s, Pick rule) {
163         Object temp = map.get(s);
164         if (temp != null) error("duplicate variable");
165         if (rule.name == null) rule.name(s);
166         map.put(s, rule);
167         return true;
168     }
169 
addSet(String variable, UnicodeSet set)170     public BNF addSet(String variable, UnicodeSet set) {
171         if (set != null) {
172             String body = set.toString();
173             t.addSymbol(variable, body, 0, body.length());
174             addPick(variable, Pick.codePoint(set));
175         }
176         return this;
177     }
178 
179     int maxRepeat = 99;
180 
qualify(Pick item)181     Pick qualify(Pick item) {
182         int[] weights;
183         int type = t.next();
184         switch (type) {
185         case '@':
186             return new Pick.Quote(item);
187         case '~':
188             return new Pick.Morph(item);
189         case '?':
190             int weight = getWeight();
191             if (weight == NO_WEIGHT) weight = 50;
192             weights = new int[] { 100 - weight, weight };
193             return Pick.repeat(0, 1, weights, item);
194         case '*':
195             weights = getWeights();
196             return Pick.repeat(1, maxRepeat, weights, item);
197         case '+':
198             weights = getWeights();
199             return Pick.repeat(1, maxRepeat, weights, item);
200         case '{':
201             if (t.next() != Tokenizer.NUMBER) error("missing number");
202             int start = (int) t.getNumber();
203             int end = start;
204             type = t.next();
205             if (type == ',') {
206                 end = maxRepeat;
207                 type = t.next();
208                 if (type == Tokenizer.NUMBER) {
209                     end = (int) t.getNumber();
210                     type = t.next();
211                 }
212             }
213             if (type != '}') error("missing }");
214             weights = getWeights();
215             return Pick.repeat(start, end, weights, item);
216         }
217         t.backup();
218         return item;
219     }
220 
getCore()221     Pick getCore() {
222         int token = t.next();
223         if (token == Tokenizer.STRING) {
224             String s = t.getString();
225             if (s.charAt(0) == '$') variables.add(s);
226             return Pick.string(s);
227         }
228         if (token == Tokenizer.UNICODESET) {
229             return Pick.codePoint(t.getUnicodeSet());
230         }
231         if (token != '(') {
232             t.backup();
233             return null;
234         }
235         Pick temp = getAlternation();
236         token = t.next();
237         if (token != ')') error("missing )");
238         return temp;
239     }
240 
getSequence()241     Pick getSequence() {
242         Pick.Sequence result = null;
243         Pick last = null;
244         while (true) {
245             Pick item = getCore();
246             if (item == null) {
247                 if (result != null) return result;
248                 if (last != null) return last;
249                 error("missing item");
250             }
251             // qualify it as many times as possible
252             Pick oldItem;
253             do {
254                 oldItem = item;
255                 item = qualify(item);
256             } while (item != oldItem);
257             // add it in
258             if (last == null) {
259                 last = item;
260             } else {
261                 if (result == null) result = Pick.makeSequence().and2(last);
262                 result = result.and2(item);
263             }
264         }
265     }
266 
267     // for simplicity, we just use recursive descent
getAlternation()268     Pick getAlternation() {
269         Pick.Alternation result = null;
270         Pick last = null;
271         int lastWeight = NO_WEIGHT;
272         while (true) {
273             Pick temp = getSequence();
274             if (temp == null) error("empty alternation");
275             int weight = getWeight();
276             if (weight == NO_WEIGHT) weight = 1;
277             if (last == null) {
278                 last = temp;
279                 lastWeight = weight;
280             } else {
281                 if (result == null) result = Pick.makeAlternation().or2(lastWeight, last);
282                 result = result.or2(weight, temp);
283             }
284             int token = t.next();
285             if (token != '|') {
286                 t.backup();
287                 if (result != null) return result;
288                 if (last != null) return last;
289             }
290         }
291     }
292 
293     private static final int NO_WEIGHT = Integer.MIN_VALUE;
294 
getWeight()295     int getWeight() {
296         int weight;
297         int token = t.next();
298         if (token != Tokenizer.NUMBER) {
299             t.backup();
300             return NO_WEIGHT;
301         }
302         weight = (int) t.getNumber();
303         token = t.next();
304         if (token != '%') error("missing %");
305         return weight;
306     }
307 
getWeights()308     int[] getWeights() {
309         ArrayList list = new ArrayList();
310         while (true) {
311             int weight = getWeight();
312             if (weight == NO_WEIGHT) break;
313             list.add(new Integer(weight));
314         }
315         if (list.size() == 0) return null;
316         int[] result = new int[list.size()];
317         for (int i = 0; i < list.size(); ++i) {
318             result[i] = ((Integer) list.get(i)).intValue();
319         }
320         return result;
321     }
322 
getMaxRepeat()323     public int getMaxRepeat() {
324         return maxRepeat;
325     }
326 
setMaxRepeat(int maxRepeat)327     public BNF setMaxRepeat(int maxRepeat) {
328         this.maxRepeat = maxRepeat;
329         return this;
330     }
331 }
332