• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 2002-2012, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package org.unicode.cldr.util;
8 
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.Map;
14 import java.util.Random;
15 import java.util.Set;
16 
17 import com.ibm.icu.text.UnicodeSet;
18 
19 public class BNF {
20     private Map map = new HashMap();
21     private Set variables = new HashSet();
22     private Pick pick = null;
23     private Pick.Target target = null;
24     private Tokenizer t;
25     private Quoter quoter;
26     private Random random;
27 
next()28     public String next() {
29         return target.next();
30     }
31 
getInternal()32     public String getInternal() {
33         return pick.getInternal(0, new HashSet());
34     }
35 
36     /*
37     + "weight = integer '%';"
38     + "range = '{' integer (',' integer?)? '}' weight*;"
39     + "quote = '@';"
40     + "star = '*' weight*;"
41     + "plus = '+' weight*;"
42     + "maybe = '?' weight?;"
43     + "quantifier = range | star | maybe | plus;"
44     + "core = string | unicodeSet | '(' alternation ')';"
45     + "sequence = (core quantifier*)+;"
46     + "alternation = sequence (weight? ('|' sequence weight?)+)?;"
47     + "rule = string '=' alternation;";
48 
49 
50     *      Match 0 or more times
51     +      Match 1 or more times
52     ?      Match 1 or 0 times
53     {n}    Match exactly n times
54     {n,}   Match at least n times
55     {n,m}  Match at least n but not more than m times
56 
57 
58 
59     */
60 
BNF(Random random, Quoter quoter)61     public BNF(Random random, Quoter quoter) {
62         this.random = random;
63         this.quoter = quoter;
64         t = new Tokenizer();
65     }
66 
addRules(String rules)67     public BNF addRules(String rules) {
68         t.setSource(rules);
69         while (addRule()) {
70         }
71         return this; // for chaining
72     }
73 
complete()74     public BNF complete() {
75         // check that the rules match the variables, except for $root in rules
76         Set ruleSet = map.keySet();
77         // add also
78         variables.add("$root");
79         variables.addAll(t.getLookedUpItems());
80         if (!ruleSet.equals(variables)) {
81             String msg = showDiff(variables, ruleSet);
82             if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg;
83             String temp = showDiff(ruleSet, variables);
84             if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp;
85             if (msg.length() == 0)
86                 msg = temp;
87             else if (temp.length() != 0) {
88                 msg = msg + "; " + temp;
89             }
90             error(msg);
91         }
92 
93         if (!ruleSet.equals(variables)) {
94             String msg = showDiff(variables, ruleSet);
95             if (msg.length() != 0) msg = "Missing definitions for: " + msg;
96             String temp = showDiff(ruleSet, variables);
97             if (temp.length() != 0) temp = "Defined but not used: " + temp;
98             if (msg.length() == 0)
99                 msg = temp;
100             else if (temp.length() != 0) {
101                 msg = msg + "; " + temp;
102             }
103             error(msg);
104         }
105 
106         // replace variables by definitions
107         Iterator it = ruleSet.iterator();
108         while (it.hasNext()) {
109             String key = (String) it.next();
110             Pick expression = (Pick) map.get(key);
111             Iterator it2 = ruleSet.iterator();
112             if (false && key.equals("$crlf")) {
113                 System.out.println("debug");
114             }
115             while (it2.hasNext()) {
116                 Object key2 = it2.next();
117                 if (key.equals(key2)) continue;
118                 Pick expression2 = (Pick) map.get(key2);
119                 expression2.replace(key, expression);
120             }
121         }
122         pick = (Pick) map.get("$root");
123         target = Pick.Target.make(pick, random, quoter);
124         // TODO remove temp collections
125         return this;
126     }
127 
showDiff(Set a, Set b)128     String showDiff(Set a, Set b) {
129         Set temp = new HashSet();
130         temp.addAll(a);
131         temp.removeAll(b);
132         if (temp.size() == 0) return "";
133         StringBuffer buffer = new StringBuffer();
134         Iterator it = temp.iterator();
135         while (it.hasNext()) {
136             if (buffer.length() != 0) buffer.append(", ");
137             buffer.append(it.next().toString());
138         }
139         return buffer.toString();
140     }
141 
error(String msg)142     void error(String msg) {
143         throw new IllegalArgumentException(msg
144             + "\r\n" + t.toString());
145     }
146 
addRule()147     private boolean addRule() {
148         int type = t.next();
149         //System.out.println(type + "t.getString " + t.getString());
150         if (type == Tokenizer.DONE) {
151             return false;
152         }
153         if (type != Tokenizer.STRING) {
154             error("missing weight");
155         }
156         String s = t.getString();
157         if (s.length() == 0 || s.charAt(0) != '$') {
158             error("missing $ in variable");
159         }
160         if (t.next() != '=') {
161             error("missing =");
162         }
163         int startBody = t.index;
164         Pick rule = getAlternation();
165         if (rule == null) error("missing expression");
166         t.addSymbol(s, t.getSource(), startBody, t.index);
167         if (t.next() != ';') {
168             error("missing ;");
169         }
170         return addPick(s, rule);
171     }
172 
addPick(String s, Pick rule)173     protected boolean addPick(String s, Pick rule) {
174         Object temp = map.get(s);
175         if (temp != null) error("duplicate variable");
176         if (rule.name == null) rule.name(s);
177         map.put(s, rule);
178         return true;
179     }
180 
addSet(String variable, UnicodeSet set)181     public BNF addSet(String variable, UnicodeSet set) {
182         if (set != null) {
183             String body = set.toString();
184             t.addSymbol(variable, body, 0, body.length());
185             addPick(variable, Pick.codePoint(set));
186         }
187         return this;
188     }
189 
190     int maxRepeat = 99;
191 
qualify(Pick item)192     Pick qualify(Pick item) {
193         int[] weights;
194         int type = t.next();
195         switch (type) {
196         case '@':
197             return new Pick.Quote(item);
198         case '~':
199             return new Pick.Morph(item);
200         case '?':
201             int weight = getWeight();
202             if (weight == NO_WEIGHT) weight = 50;
203             weights = new int[] { 100 - weight, weight };
204             return Pick.repeat(0, 1, weights, item);
205         case '*':
206             weights = getWeights();
207             return Pick.repeat(1, maxRepeat, weights, item);
208         case '+':
209             weights = getWeights();
210             return Pick.repeat(1, maxRepeat, weights, item);
211         case '{':
212             if (t.next() != Tokenizer.NUMBER) error("missing number");
213             int start = (int) t.getNumber();
214             int end = start;
215             type = t.next();
216             if (type == ',') {
217                 end = maxRepeat;
218                 type = t.next();
219                 if (type == Tokenizer.NUMBER) {
220                     end = (int) t.getNumber();
221                     type = t.next();
222                 }
223             }
224             if (type != '}') error("missing }");
225             weights = getWeights();
226             return Pick.repeat(start, end, weights, item);
227         }
228         t.backup();
229         return item;
230     }
231 
getCore()232     Pick getCore() {
233         int token = t.next();
234         if (token == Tokenizer.STRING) {
235             String s = t.getString();
236             if (s.charAt(0) == '$') variables.add(s);
237             return Pick.string(s);
238         }
239         if (token == Tokenizer.UNICODESET) {
240             return Pick.codePoint(t.getUnicodeSet());
241         }
242         if (token != '(') {
243             t.backup();
244             return null;
245         }
246         Pick temp = getAlternation();
247         token = t.next();
248         if (token != ')') error("missing )");
249         return temp;
250     }
251 
getSequence()252     Pick getSequence() {
253         Pick.Sequence result = null;
254         Pick last = null;
255         while (true) {
256             Pick item = getCore();
257             if (item == null) {
258                 if (result != null) return result;
259                 if (last != null) return last;
260                 error("missing item in sequence");
261             }
262             // qualify it as many times as possible
263             Pick oldItem;
264             do {
265                 oldItem = item;
266                 item = qualify(item);
267             } while (item != oldItem);
268             // add it in
269             if (last == null) {
270                 last = item;
271             } else {
272                 if (result == null) result = Pick.makeSequence().and2(last);
273                 result = result.and2(item);
274             }
275         }
276     }
277 
278     // for simplicity, we just use recursive descent
getAlternation()279     Pick getAlternation() {
280         Pick.Alternation result = null;
281         Pick last = null;
282         int lastWeight = NO_WEIGHT;
283         while (true) {
284             Pick temp = getSequence();
285             if (temp == null) error("empty alternation");
286             int weight = getWeight();
287             if (weight == NO_WEIGHT) weight = 1;
288             if (last == null) {
289                 last = temp;
290                 lastWeight = weight;
291             } else {
292                 if (result == null) result = Pick.makeAlternation().or2(lastWeight, last);
293                 result = result.or2(weight, temp);
294             }
295             int token = t.next();
296             if (token != '|') {
297                 t.backup();
298                 if (result != null) return result;
299                 if (last != null) return last;
300             }
301         }
302     }
303 
304     private static final int NO_WEIGHT = Integer.MIN_VALUE;
305 
getWeight()306     int getWeight() {
307         int weight;
308         int token = t.next();
309         if (token != Tokenizer.NUMBER) {
310             t.backup();
311             return NO_WEIGHT;
312         }
313         weight = (int) t.getNumber();
314         token = t.next();
315         if (token != '%') error("missing %");
316         return weight;
317     }
318 
getWeights()319     int[] getWeights() {
320         ArrayList list = new ArrayList();
321         while (true) {
322             int weight = getWeight();
323             if (weight == NO_WEIGHT) break;
324             list.add(new Integer(weight));
325         }
326         if (list.size() == 0) return null;
327         int[] result = new int[list.size()];
328         for (int i = 0; i < list.size(); ++i) {
329             result[i] = ((Integer) list.get(i)).intValue();
330         }
331         return result;
332     }
333 
getMaxRepeat()334     public int getMaxRepeat() {
335         return maxRepeat;
336     }
337 
setMaxRepeat(int maxRepeat)338     public BNF setMaxRepeat(int maxRepeat) {
339         this.maxRepeat = maxRepeat;
340         return this;
341     }
342 }
343