• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 2002-2012, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package org.unicode.cldr.util;
8 
9 import com.ibm.icu.text.UnicodeSet;
10 import java.util.ArrayList;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Iterator;
14 import java.util.Map;
15 import java.util.Random;
16 import java.util.Set;
17 
18 public class BNF {
19     private Map map = new HashMap();
20     private Set variables = new HashSet();
21     private Pick pick = null;
22     private Pick.Target target = null;
23     private Tokenizer t;
24     private Quoter quoter;
25     private Random random;
26 
next()27     public String next() {
28         return target.next();
29     }
30 
getInternal()31     public String getInternal() {
32         return pick.getInternal(0, new HashSet());
33     }
34 
35     /*
36     + "weight = integer '%';"
37     + "range = '{' integer (',' integer?)? '}' weight*;"
38     + "quote = '@';"
39     + "star = '*' weight*;"
40     + "plus = '+' weight*;"
41     + "maybe = '?' weight?;"
42     + "quantifier = range | star | maybe | plus;"
43     + "core = string | unicodeSet | '(' alternation ')';"
44     + "sequence = (core quantifier*)+;"
45     + "alternation = sequence (weight? ('|' sequence weight?)+)?;"
46     + "rule = string '=' alternation;";
47 
48 
49     *      Match 0 or more times
50     +      Match 1 or more times
51     ?      Match 1 or 0 times
52     {n}    Match exactly n times
53     {n,}   Match at least n times
54     {n,m}  Match at least n but not more than m times
55 
56 
57 
58     */
59 
BNF(Random random, Quoter quoter)60     public BNF(Random random, Quoter quoter) {
61         this.random = random;
62         this.quoter = quoter;
63         t = new Tokenizer();
64     }
65 
addRules(String rules)66     public BNF addRules(String rules) {
67         t.setSource(rules);
68         while (addRule()) {}
69         return this; // for chaining
70     }
71 
complete()72     public BNF complete() {
73         // check that the rules match the variables, except for $root in rules
74         Set ruleSet = map.keySet();
75         // add also
76         variables.add("$root");
77         variables.addAll(t.getLookedUpItems());
78         if (!ruleSet.equals(variables)) {
79             String msg = showDiff(variables, ruleSet);
80             if (msg.length() != 0) msg = "Error: Missing definitions for: " + msg;
81             String temp = showDiff(ruleSet, variables);
82             if (temp.length() != 0) temp = "Warning: Defined but not used: " + temp;
83             if (msg.length() == 0) msg = temp;
84             else if (temp.length() != 0) {
85                 msg = msg + "; " + temp;
86             }
87             error(msg);
88         }
89 
90         if (!ruleSet.equals(variables)) {
91             String msg = showDiff(variables, ruleSet);
92             if (msg.length() != 0) msg = "Missing definitions for: " + msg;
93             String temp = showDiff(ruleSet, variables);
94             if (temp.length() != 0) temp = "Defined but not used: " + temp;
95             if (msg.length() == 0) msg = temp;
96             else if (temp.length() != 0) {
97                 msg = msg + "; " + temp;
98             }
99             error(msg);
100         }
101 
102         // replace variables by definitions
103         Iterator it = ruleSet.iterator();
104         while (it.hasNext()) {
105             String key = (String) it.next();
106             Pick expression = (Pick) map.get(key);
107             Iterator it2 = ruleSet.iterator();
108             if (false && key.equals("$crlf")) {
109                 System.out.println("debug");
110             }
111             while (it2.hasNext()) {
112                 Object key2 = it2.next();
113                 if (key.equals(key2)) continue;
114                 Pick expression2 = (Pick) map.get(key2);
115                 expression2.replace(key, expression);
116             }
117         }
118         pick = (Pick) map.get("$root");
119         target = Pick.Target.make(pick, random, quoter);
120         // TODO remove temp collections
121         return this;
122     }
123 
showDiff(Set a, Set b)124     String showDiff(Set a, Set b) {
125         Set temp = new HashSet();
126         temp.addAll(a);
127         temp.removeAll(b);
128         if (temp.size() == 0) return "";
129         StringBuffer buffer = new StringBuffer();
130         Iterator it = temp.iterator();
131         while (it.hasNext()) {
132             if (buffer.length() != 0) buffer.append(", ");
133             buffer.append(it.next().toString());
134         }
135         return buffer.toString();
136     }
137 
error(String msg)138     void error(String msg) {
139         throw new IllegalArgumentException(msg + "\r\n" + t.toString());
140     }
141 
addRule()142     private boolean addRule() {
143         int type = t.next();
144         // System.out.println(type + "t.getString " + t.getString());
145         if (type == Tokenizer.DONE) {
146             return false;
147         }
148         if (type != Tokenizer.STRING) {
149             error("missing weight");
150         }
151         String s = t.getString();
152         if (s.length() == 0 || s.charAt(0) != '$') {
153             error("missing $ in variable");
154         }
155         if (t.next() != '=') {
156             error("missing =");
157         }
158         int startBody = t.index;
159         Pick rule = getAlternation();
160         if (rule == null) error("missing expression");
161         t.addSymbol(s, t.getSource(), startBody, t.index);
162         if (t.next() != ';') {
163             error("missing ;");
164         }
165         return addPick(s, rule);
166     }
167 
addPick(String s, Pick rule)168     protected boolean addPick(String s, Pick rule) {
169         Object temp = map.get(s);
170         if (temp != null) error("duplicate variable");
171         if (rule.name == null) rule.name(s);
172         map.put(s, rule);
173         return true;
174     }
175 
addSet(String variable, UnicodeSet set)176     public BNF addSet(String variable, UnicodeSet set) {
177         if (set != null) {
178             String body = set.toString();
179             t.addSymbol(variable, body, 0, body.length());
180             addPick(variable, Pick.codePoint(set));
181         }
182         return this;
183     }
184 
185     int maxRepeat = 99;
186 
qualify(Pick item)187     Pick qualify(Pick item) {
188         int[] weights;
189         int type = t.next();
190         switch (type) {
191             case '@':
192                 return new Pick.Quote(item);
193             case '~':
194                 return new Pick.Morph(item);
195             case '?':
196                 int weight = getWeight();
197                 if (weight == NO_WEIGHT) weight = 50;
198                 weights = new int[] {100 - weight, weight};
199                 return Pick.repeat(0, 1, weights, item);
200             case '*':
201                 weights = getWeights();
202                 return Pick.repeat(1, maxRepeat, weights, item);
203             case '+':
204                 weights = getWeights();
205                 return Pick.repeat(1, maxRepeat, weights, item);
206             case '{':
207                 if (t.next() != Tokenizer.NUMBER) error("missing number");
208                 int start = (int) t.getNumber();
209                 int end = start;
210                 type = t.next();
211                 if (type == ',') {
212                     end = maxRepeat;
213                     type = t.next();
214                     if (type == Tokenizer.NUMBER) {
215                         end = (int) t.getNumber();
216                         type = t.next();
217                     }
218                 }
219                 if (type != '}') error("missing }");
220                 weights = getWeights();
221                 return Pick.repeat(start, end, weights, item);
222         }
223         t.backup();
224         return item;
225     }
226 
getCore()227     Pick getCore() {
228         int token = t.next();
229         if (token == Tokenizer.STRING) {
230             String s = t.getString();
231             if (s.charAt(0) == '$') variables.add(s);
232             return Pick.string(s);
233         }
234         if (token == Tokenizer.UNICODESET) {
235             return Pick.codePoint(t.getUnicodeSet());
236         }
237         if (token != '(') {
238             t.backup();
239             return null;
240         }
241         Pick temp = getAlternation();
242         token = t.next();
243         if (token != ')') error("missing )");
244         return temp;
245     }
246 
getSequence()247     Pick getSequence() {
248         Pick.Sequence result = null;
249         Pick last = null;
250         while (true) {
251             Pick item = getCore();
252             if (item == null) {
253                 if (result != null) return result;
254                 if (last != null) return last;
255                 error("missing item in sequence");
256             }
257             // qualify it as many times as possible
258             Pick oldItem;
259             do {
260                 oldItem = item;
261                 item = qualify(item);
262             } while (item != oldItem);
263             // add it in
264             if (last == null) {
265                 last = item;
266             } else {
267                 if (result == null) result = Pick.makeSequence().and2(last);
268                 result = result.and2(item);
269             }
270         }
271     }
272 
273     // for simplicity, we just use recursive descent
getAlternation()274     Pick getAlternation() {
275         Pick.Alternation result = null;
276         Pick last = null;
277         int lastWeight = NO_WEIGHT;
278         while (true) {
279             Pick temp = getSequence();
280             if (temp == null) error("empty alternation");
281             int weight = getWeight();
282             if (weight == NO_WEIGHT) weight = 1;
283             if (last == null) {
284                 last = temp;
285                 lastWeight = weight;
286             } else {
287                 if (result == null) result = Pick.makeAlternation().or2(lastWeight, last);
288                 result = result.or2(weight, temp);
289             }
290             int token = t.next();
291             if (token != '|') {
292                 t.backup();
293                 if (result != null) return result;
294                 if (last != null) return last;
295             }
296         }
297     }
298 
299     private static final int NO_WEIGHT = Integer.MIN_VALUE;
300 
getWeight()301     int getWeight() {
302         int weight;
303         int token = t.next();
304         if (token != Tokenizer.NUMBER) {
305             t.backup();
306             return NO_WEIGHT;
307         }
308         weight = (int) t.getNumber();
309         token = t.next();
310         if (token != '%') error("missing %");
311         return weight;
312     }
313 
getWeights()314     int[] getWeights() {
315         ArrayList<Integer> list = new ArrayList();
316         while (true) {
317             int weight = getWeight();
318             if (weight == NO_WEIGHT) break;
319             list.add(weight);
320         }
321         if (list.isEmpty()) return null;
322         int[] result = new int[list.size()];
323         for (int i = 0; i < list.size(); ++i) {
324             result[i] = list.get(i).intValue();
325         }
326         return result;
327     }
328 
getMaxRepeat()329     public int getMaxRepeat() {
330         return maxRepeat;
331     }
332 
setMaxRepeat(int maxRepeat)333     public BNF setMaxRepeat(int maxRepeat) {
334         this.maxRepeat = maxRepeat;
335         return this;
336     }
337 }
338