• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.unicode.cldr.test;
2 
3 import com.ibm.icu.text.MessageFormat;
4 import java.io.BufferedReader;
5 import java.io.IOException;
6 import java.util.ArrayList;
7 import java.util.List;
8 import java.util.regex.Matcher;
9 import java.util.regex.Pattern;
10 import org.unicode.cldr.util.CldrUtility;
11 import org.unicode.cldr.util.PatternCache;
12 import org.unicode.cldr.util.SimpleHtmlParser;
13 import org.unicode.cldr.util.SimpleHtmlParser.Type;
14 import org.unicode.cldr.util.TransliteratorUtilities;
15 
16 /** Private class to get the messages from a help file. */
17 public class HelpMessages {
18     private static final Matcher CLEANUP_BOOKMARK = PatternCache.get("[^a-zA-Z0-9]").matcher("");
19 
20     private static final MessageFormat DEFAULT_HEADER_PATTERN =
21             new MessageFormat("<p>{0}</p>" + CldrUtility.LINE_SEPARATOR);
22 
23     private static final Matcher HEADER_HTML =
24             PatternCache.get("<h[0-9]>(.*)</h[0-9]>").matcher("");
25 
26     List<Matcher> keys = new ArrayList<>();
27 
28     List<String> values = new ArrayList<>();
29 
30     enum Status {
31         BASE,
32         BEFORE_CELL,
33         IN_CELL,
34         IN_INSIDE_TABLE
35     }
36 
37     StringBuilder[] currentColumn = new StringBuilder[2];
38 
39     int column = 0;
40 
41     private static HelpMessages helpMessages;
42 
43     /**
44      * Create a HelpMessages object from a filename. The file has to be in the format of a table of
45      * <keyRegex,htmlText> pairs, where the key is a keyRegex expression and htmlText is arbitrary
46      * HTML text. For example:
47      *
48      * <p>{@link
49      * http://unicode.org/cldr/data/tools/cldr-code/org/unicode/cldr/util/data/chart_messages.html}
50      * is used for chart messages, where the key is the name of the chart.
51      *
52      * <p>{@link
53      * http://unicode.org/cldr/data/tools/cldr-code/org/unicode/cldr/util/data/test_help_messages.html}
54      * is used for help messages in the survey tool, where the key is an xpath.
55      *
56      * @param filename
57      */
HelpMessages(String filename)58     public HelpMessages(String filename) {
59         currentColumn[0] = new StringBuilder();
60         currentColumn[1] = new StringBuilder();
61         BufferedReader in;
62         try {
63             in = CldrUtility.getUTF8Data(filename);
64             int tableCount = 0;
65 
66             boolean inContent = false;
67             // if the table level is 1 (we are in the main table), then we look for
68             // <td>...</td><td>...</td>. That
69             // means that we have column 1 and column 2.
70 
71             SimpleHtmlParser simple = new SimpleHtmlParser().setReader(in);
72             StringBuilder result = new StringBuilder();
73             boolean hadPop = false;
74             main:
75             while (true) {
76                 Type x = simple.next(result);
77                 switch (x) {
78                     case ELEMENT: // with /table we pop the count
79                         if (SimpleHtmlParser.equals("table", result)) {
80                             if (hadPop) {
81                                 --tableCount;
82                             } else {
83                                 ++tableCount;
84                             }
85                         } else if (tableCount == 1) {
86                             if (SimpleHtmlParser.equals("tr", result)) {
87                                 if (hadPop) {
88                                     addHelpMessages();
89                                 }
90                                 column = 0;
91                             } else if (SimpleHtmlParser.equals("td", result)) {
92                                 if (hadPop) {
93                                     inContent = false;
94                                     ++column;
95                                 } else {
96                                     inContent = true;
97                                     continue main; // skip adding
98                                 }
99                             }
100                         }
101                         break;
102                     case ELEMENT_POP:
103                         hadPop = true;
104                         break;
105                     case ELEMENT_END:
106                         hadPop = false;
107                         break;
108                     case DONE:
109                         break main;
110                 }
111                 if (inContent) {
112                     SimpleHtmlParser.writeResult(x, result, currentColumn[column]);
113                 }
114             }
115 
116             in.close();
117         } catch (IOException e) {
118             System.err.println("Can't initialize help text");
119         }
120     }
121 
122     /**
123      * Get message corresponding to a key out of the file set on this object. For many files, the
124      * key will be an xpath, but it doesn't have to be. Note that <i>all</i> of pairs of
125      * <keyRegex,htmlText> where the key matches keyRegex will be concatenated together in order to
126      * get the result.
127      *
128      * @param key
129      * @return
130      */
find(String key)131     public String find(String key) {
132         return find(key, DEFAULT_HEADER_PATTERN);
133     }
134 
135     /**
136      * Get message corresponding to a key out of the file set on this object. For many files, the
137      * key will be an xpath, but it doesn't have to be. Note that <i>all</i> of pairs of
138      * <keyRegex,htmlText> where the key matches keyRegex will be concatenated together in order to
139      * get the result.
140      *
141      * @param key
142      * @param addHeader true if you want a header formed by looking at all the hN elements.
143      * @return
144      */
find(String key, MessageFormat headerPattern)145     public String find(String key, MessageFormat headerPattern) {
146         StringBuilder header = new StringBuilder();
147         StringBuilder result = new StringBuilder();
148         int keyCount = 0;
149         for (int i = 0; i < keys.size(); ++i) {
150             if (keys.get(i).reset(key).matches()) {
151                 if (result.length() != 0) {
152                     result.append(CldrUtility.LINE_SEPARATOR);
153                 }
154                 String value = values.get(i);
155                 if (headerPattern != null) {
156                     HEADER_HTML.reset(value);
157                     int lastEnd = 0;
158                     StringBuilder newValue = new StringBuilder();
159                     while (HEADER_HTML.find()) {
160                         String contents = HEADER_HTML.group(1);
161                         if (contents.contains("<")) {
162                             continue; // disallow other formatting
163                         }
164                         String bookmark = "HM_" + CLEANUP_BOOKMARK.reset(contents).replaceAll("_");
165                         keyCount++;
166                         if (header.length() > 0) {
167                             header.append(" | ");
168                         }
169                         header.append("<a href='#")
170                                 .append(bookmark)
171                                 .append("'>")
172                                 .append(contents)
173                                 .append("</a>");
174                         newValue.append(value.substring(lastEnd, HEADER_HTML.start(1)));
175                         newValue.append("<a name='")
176                                 .append(bookmark)
177                                 .append("'>")
178                                 .append(contents)
179                                 .append("</a>");
180                         lastEnd = HEADER_HTML.end(1);
181                     }
182                     newValue.append(value.substring(lastEnd));
183                     value = newValue.toString();
184                 }
185                 result.append(value);
186             }
187         }
188         if (result.length() != 0) {
189             if (keyCount > 1) {
190                 result.insert(0, headerPattern.format(new Object[] {header.toString()}));
191             }
192             return result.toString();
193         }
194         return null;
195     }
196 
addHelpMessages()197     private void addHelpMessages() {
198         if (column == 2) { // must have two columns
199             try {
200                 // remove the first character and the last two characters, since the are >....</
201                 String key = currentColumn[0].substring(1, currentColumn[0].length() - 2).trim();
202                 String value = currentColumn[1].substring(1, currentColumn[1].length() - 2).trim();
203                 if (ExampleGenerator.DEBUG_SHOW_HELP) {
204                     System.out.println("{" + key + "} => {" + value + "}");
205                 }
206                 Matcher m =
207                         Pattern.compile(
208                                         TransliteratorUtilities.fromHTML.transliterate(key),
209                                         Pattern.COMMENTS)
210                                 .matcher("");
211                 keys.add(m);
212                 values.add(value);
213             } catch (RuntimeException e) {
214                 System.err.println("Help file has illegal regex: " + currentColumn[0]);
215             }
216         }
217         currentColumn[0].setLength(0);
218         currentColumn[1].setLength(0);
219         column = 0;
220     }
221 
getChartMessages(String xpath)222     public static String getChartMessages(String xpath) {
223         synchronized (HelpMessages.class) {
224             if (HelpMessages.helpMessages == null) {
225                 HelpMessages.helpMessages = new HelpMessages("chart_messages.html");
226             }
227         }
228         return HelpMessages.helpMessages.find(xpath);
229         // if (xpath.contains("/exemplarCharacters")) {
230         // result = "The standard exemplar characters are those used in customary writing ([a-z] for
231         // English; "
232         // + "the auxiliary characters are used in foreign words found in typical magazines,
233         // newspapers, &c.; "
234         // + "currency auxilliary characters are those used in currency symbols, like 'US$ 1,234'.
235         // ";
236         // }
237         // return result == null ? null : TransliteratorUtilities.toHTML.transliterate(result);
238     }
239 }
240