• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the  "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 /*
19  * $Id$
20  */
21 
22 package org.apache.qetest;
23 import java.io.File;
24 import java.io.FileOutputStream;
25 import java.io.OutputStreamWriter;
26 import java.io.PrintWriter;
27 
28 
29 /**
30  * Simple utility for writing XML documents from character tables.
31  *
32  * @author scott_boag@lotus.com
33  * @author shane_curcuru@lotus.com
34  * @version $Id$
35  */
36 public class CharTables
37 {
38 
39     /**
40      * Write a chars table to a file.
41      *
42      * Simply uses new OutputStreamWriter(..., fileencoding).
43      *
44      * @param chars array of Objects, Integer char code and
45      * String description thereof (only including applicable codes)
46      * @param includeUnencoded, or simply don't write them out at all
47      * @param xmlencoding the XML name used in encoding= attr
48      * @param fileencoding the encoding to output to
49      * @param filename to write to
50      * @throws any underlying exceptions
51      */
writeCharTableFile(Object[][] chars, boolean includeUnencoded, String xmlencoding, String fileencoding, String filename)52     public static void writeCharTableFile(Object[][] chars, boolean includeUnencoded,
53             String xmlencoding, String fileencoding, String filename)
54             throws Exception
55     {
56         File f = new File(filename);
57         FileOutputStream fos = new FileOutputStream(f);
58         PrintWriter writer = new PrintWriter(new OutputStreamWriter(fos, fileencoding));
59 
60         writer.println("<?xml version=\"1.0\" encoding=\"" + xmlencoding + "\"?>");
61         writer.println("<chartables fileencoding=\"" + fileencoding + "\">");
62         CharTables.writeCharTable(chars, includeUnencoded, xmlencoding, writer);
63         writer.println("</chartables>");
64         writer.close();
65     }
66 
67     /**
68      * Write a chars table to a stream.
69      *
70      * @param chars array of Objects, Integer char code and
71      * String description thereof (only including applicable codes)
72      * @param includeUnencoded, or simply don't write them out at all
73      * @param encoding the encoding to output to
74      * @param writer where to write to
75      * @throws any underlying exceptions
76      */
writeCharTable(Object[][] chars, boolean includeUnencoded, String encoding, PrintWriter writer)77     public static void writeCharTable(Object[][] chars, boolean includeUnencoded,
78             String encoding, PrintWriter writer)
79             throws Exception
80     {
81         writer.println(CHARS_HEADER + encoding + "\" includeUnencoded=\"" + includeUnencoded + "\">");
82         int numChars = chars.length;
83 
84         for ( int x = 0x20; x <= 0x03CE+4/* 0xD7FF */; x++ )
85         {
86             int i;
87             for ( i = 0; i < numChars; i++ )
88             {
89                 final int code = ((Integer)(chars[i][0])).intValue();
90 
91                 if ( code == x )
92                 {
93                     writer.print(CHAR_HEADER + code + CHAR_HEADER2 + chars[i][1] + "\">");
94                     switch ( code )
95                     {
96                     case '&':
97                         writer.print(C_HEADER);
98                         writer.print("&amp;");
99                         writer.print(C_ENDER);
100                         break;
101                     case '<':
102                         writer.print(C_HEADER);
103                         writer.print("&lt;");
104                         writer.print(C_ENDER);
105                         break;
106                     default:
107                         writer.print(C_HEADER);
108                         writer.print(((char)code));
109                         writer.print(C_ENDER);
110                     }
111                     writer.print(E_HEADER);
112                     writer.print("&#x");
113                     writer.print(Integer.toHexString(code));
114                     writer.print(";");
115                     writer.print(E_ENDER);
116                     writer.println(CHAR_ENDER);
117                     break; // from for...
118                 }
119             } // of for(i...
120             // This character is not provided in the specified encoding
121             if ( includeUnencoded && ( i == numChars ))
122             {
123                 writer.print(CHAR_HEADER + x + CHAR_HEADER2 + "not encoded" + "\">");
124                 // Since this character isn't in this encoding,
125                 //  don't bother writing out the ELEM_C
126                 writer.print(E_HEADER);
127                 writer.print("&#x");
128                 writer.print(Integer.toHexString(x));
129                 writer.print(";");
130                 writer.print(E_ENDER);
131                 writer.println(CHAR_ENDER);
132             }
133 
134         }// of for(x...
135 
136         writer.println(CHARS_ENDER);
137         writer.flush();
138     } // of writeCharTable
139 
140 
141     /** chars elem - the whole table.  */
142     public static final String ELEM_CHARS = "chars";
143 
144     /** chars elem, enc attr - encoding of these chars.  */
145     public static final String ATTR_ENC = "enc";
146 
147     /** Convenience precalculated string.  */
148     public static String CHARS_HEADER = "<" + ELEM_CHARS + " " + ATTR_ENC + "=\"";
149 
150     /** Convenience precalculated string.  */
151     public static String CHARS_ENDER = "</" + ELEM_CHARS + ">";
152 
153     /** char elem - a single character.  */
154     public static final String ELEM_CHAR = "char";
155 
156     /** char elem, dec attr - decimal char code.  */
157     public static final String ATTR_DEC = "dec";
158 
159     /** char elem, desc attr - description.  */
160     public static final String ATTR_DESC = "desc";
161 
162     /** Convenience precalculated string.  */
163     public static String CHAR_HEADER = "<" + ELEM_CHAR + " " + ATTR_DEC + "=\"";
164 
165     /** Convenience precalculated string.  */
166     public static String CHAR_HEADER2 = "\" " + ATTR_DESC + "=\"";
167 
168     /** Convenience precalculated string.  */
169     public static String CHAR_ENDER = "</" + ELEM_CHAR + ">";
170 
171 
172     /** c elem - just the character in the encoding.  */
173     public static final String ELEM_C = "c";
174 
175     /** Convenience precalculated string.  */
176     public static String C_HEADER = "<" + ELEM_C + ">";
177 
178     /** Convenience precalculated string.  */
179     public static String C_ENDER = "</" + ELEM_C + ">";
180 
181 
182     /** e elem - the entity reference to the character.  */
183     public static final String ELEM_E = "e";
184 
185     /** Convenience precalculated string.  */
186     public static String E_HEADER = "<" + ELEM_E + ">";
187 
188     /** Convenience precalculated string.  */
189     public static String E_ENDER = "</" + ELEM_E + ">";
190 
191 
192     /**
193      * Main method to run from the command line; sample usage.
194      * @param args cmd line arguments
195      */
main(String[] args)196     public static void main(String[] args)
197     {
198         String filename = "chartable.xml";
199         if (args.length >= 1)
200         {
201             filename = args[0];
202         }
203         String xmlencoding = "ISO-8859-7";
204         String fileencoding = "ISO8859_7";
205         try
206         {
207             // Sample usage with greek table, below
208             CharTables.writeCharTableFile(greek, false, xmlencoding, fileencoding, filename);
209             System.out.println("Wrote " + filename + " output in encodings " + xmlencoding + "/" + fileencoding);
210         }
211         catch (Exception e)
212         {
213             e.printStackTrace();
214         }
215     }
216 
217 
218     /** Sample data: greek/ISO-8859-7/ISO8859_7 .  */
219     public static final Object greek[][] =
220     {
221         {new Integer(0x0020),	"SPACE"}
222         , {new Integer(0x0021),	"EXCLAMATION MARK"}
223         , {new Integer(0x0022),	"QUOTATION MARK"}
224         , {new Integer(0x0023),	"NUMBER SIGN"}
225         , {new Integer(0x0024),	"DOLLAR SIGN"}
226         , {new Integer(0x0025),	"PERCENT SIGN"}
227         , {new Integer(0x0026),	"AMPERSAND"}
228         , {new Integer(0x0027),	"APOSTROPHE"}
229         , {new Integer(0x0028),	"LEFT PARENTHESIS"}
230         , {new Integer(0x0029),	"RIGHT PARENTHESIS"}
231         , {new Integer(0x002A),	"ASTERISK"}
232         , {new Integer(0x002B),	"PLUS SIGN"}
233         , {new Integer(0x002C),	"COMMA"}
234         , {new Integer(0x002D),	"HYPHEN-MINUS"}
235         , {new Integer(0x002E),	"FULL STOP"}
236         , {new Integer(0x002F),	"SOLIDUS"}
237         , {new Integer(0x0030),	"DIGIT ZERO"}
238         , {new Integer(0x0031),	"DIGIT ONE"}
239         , {new Integer(0x0032),	"DIGIT TWO"}
240         , {new Integer(0x0033),	"DIGIT THREE"}
241         , {new Integer(0x0034),	"DIGIT FOUR"}
242         , {new Integer(0x0035),	"DIGIT FIVE"}
243         , {new Integer(0x0036),	"DIGIT SIX"}
244         , {new Integer(0x0037),	"DIGIT SEVEN"}
245         , {new Integer(0x0038),	"DIGIT EIGHT"}
246         , {new Integer(0x0039),	"DIGIT NINE"}
247         , {new Integer(0x003A),	"COLON"}
248         , {new Integer(0x003B),	"SEMICOLON"}
249         , {new Integer(0x003C),	"LESS-THAN SIGN"}
250         , {new Integer(0x003D),	"EQUALS SIGN"}
251         , {new Integer(0x003E),	"GREATER-THAN SIGN"}
252         , {new Integer(0x003F),	"QUESTION MARK"}
253         , {new Integer(0x0040),	"COMMERCIAL AT"}
254         , {new Integer(0x0041),	"LATIN CAPITAL LETTER A"}
255         , {new Integer(0x0042),	"LATIN CAPITAL LETTER B"}
256         , {new Integer(0x0043),	"LATIN CAPITAL LETTER C"}
257         , {new Integer(0x0044),	"LATIN CAPITAL LETTER D"}
258         , {new Integer(0x0045),	"LATIN CAPITAL LETTER E"}
259         , {new Integer(0x0046),	"LATIN CAPITAL LETTER F"}
260         , {new Integer(0x0047),	"LATIN CAPITAL LETTER G"}
261         , {new Integer(0x0048),	"LATIN CAPITAL LETTER H"}
262         , {new Integer(0x0049),	"LATIN CAPITAL LETTER I"}
263         , {new Integer(0x004A),	"LATIN CAPITAL LETTER J"}
264         , {new Integer(0x004B),	"LATIN CAPITAL LETTER K"}
265         , {new Integer(0x004C),	"LATIN CAPITAL LETTER L"}
266         , {new Integer(0x004D),	"LATIN CAPITAL LETTER M"}
267         , {new Integer(0x004E),	"LATIN CAPITAL LETTER N"}
268         , {new Integer(0x004F),	"LATIN CAPITAL LETTER O"}
269         , {new Integer(0x0050),	"LATIN CAPITAL LETTER P"}
270         , {new Integer(0x0051),	"LATIN CAPITAL LETTER Q"}
271         , {new Integer(0x0052),	"LATIN CAPITAL LETTER R"}
272         , {new Integer(0x0053),	"LATIN CAPITAL LETTER S"}
273         , {new Integer(0x0054),	"LATIN CAPITAL LETTER T"}
274         , {new Integer(0x0055),	"LATIN CAPITAL LETTER U"}
275         , {new Integer(0x0056),	"LATIN CAPITAL LETTER V"}
276         , {new Integer(0x0057),	"LATIN CAPITAL LETTER W"}
277         , {new Integer(0x0058),	"LATIN CAPITAL LETTER X"}
278         , {new Integer(0x0059),	"LATIN CAPITAL LETTER Y"}
279         , {new Integer(0x005A),	"LATIN CAPITAL LETTER Z"}
280         , {new Integer(0x005B),	"LEFT SQUARE BRACKET"}
281         , {new Integer(0x005C),	"REVERSE SOLIDUS"}
282         , {new Integer(0x005D),	"RIGHT SQUARE BRACKET"}
283         , {new Integer(0x005E),	"CIRCUMFLEX ACCENT"}
284         , {new Integer(0x005F),	"LOW LINE"}
285         , {new Integer(0x0060),	"GRAVE ACCENT"}
286         , {new Integer(0x0061),	"LATIN SMALL LETTER A"}
287         , {new Integer(0x0062),	"LATIN SMALL LETTER B"}
288         , {new Integer(0x0063),	"LATIN SMALL LETTER C"}
289         , {new Integer(0x0064),	"LATIN SMALL LETTER D"}
290         , {new Integer(0x0065),	"LATIN SMALL LETTER E"}
291         , {new Integer(0x0066),	"LATIN SMALL LETTER F"}
292         , {new Integer(0x0067),	"LATIN SMALL LETTER G"}
293         , {new Integer(0x0068),	"LATIN SMALL LETTER H"}
294         , {new Integer(0x0069),	"LATIN SMALL LETTER I"}
295         , {new Integer(0x006A),	"LATIN SMALL LETTER J"}
296         , {new Integer(0x006B),	"LATIN SMALL LETTER K"}
297         , {new Integer(0x006C),	"LATIN SMALL LETTER L"}
298         , {new Integer(0x006D),	"LATIN SMALL LETTER M"}
299         , {new Integer(0x006E),	"LATIN SMALL LETTER N"}
300         , {new Integer(0x006F),	"LATIN SMALL LETTER O"}
301         , {new Integer(0x0070),	"LATIN SMALL LETTER P"}
302         , {new Integer(0x0071),	"LATIN SMALL LETTER Q"}
303         , {new Integer(0x0072),	"LATIN SMALL LETTER R"}
304         , {new Integer(0x0073),	"LATIN SMALL LETTER S"}
305         , {new Integer(0x0074),	"LATIN SMALL LETTER T"}
306         , {new Integer(0x0075),	"LATIN SMALL LETTER U"}
307         , {new Integer(0x0076),	"LATIN SMALL LETTER V"}
308         , {new Integer(0x0077),	"LATIN SMALL LETTER W"}
309         , {new Integer(0x0078),	"LATIN SMALL LETTER X"}
310         , {new Integer(0x0079),	"LATIN SMALL LETTER Y"}
311         , {new Integer(0x007A),	"LATIN SMALL LETTER Z"}
312         , {new Integer(0x007B),	"LEFT CURLY BRACKET"}
313         , {new Integer(0x007C),	"VERTICAL LINE"}
314         , {new Integer(0x007D),	"RIGHT CURLY BRACKET"}
315         , {new Integer(0x007E),	"TILDE"}
316         , {new Integer(0x00A0),	"NO-BREAK SPACE"}
317         , {new Integer(0x02BD),	"MODIFIER LETTER REVERSED COMMA"}
318         , {new Integer(0x02BC),	"MODIFIER LETTER APOSTROPHE"}
319         , {new Integer(0x00A3),	"POUND SIGN"}
320         , {new Integer(0x00A6),	"BROKEN BAR"}
321         , {new Integer(0x00A7),	"SECTION SIGN"}
322         , {new Integer(0x00A8),	"DIAERESIS"}
323         , {new Integer(0x00A9),	"COPYRIGHT SIGN"}
324         , {new Integer(0x00AB),	"LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"}
325         , {new Integer(0x00AC),	"NOT SIGN"}
326         , {new Integer(0x00AD),	"SOFT HYPHEN"}
327         , {new Integer(0x2015),	"HORIZONTAL BAR"}
328         , {new Integer(0x00B0),	"DEGREE SIGN"}
329         , {new Integer(0x00B1),	"PLUS-MINUS SIGN"}
330         , {new Integer(0x00B2),	"SUPERSCRIPT TWO"}
331         , {new Integer(0x00B3),	"SUPERSCRIPT THREE"}
332         , {new Integer(0x0384),	"GREEK TONOS"}
333         , {new Integer(0x0385),	"GREEK DIALYTIKA TONOS"}
334         , {new Integer(0x0386),	"GREEK CAPITAL LETTER ALPHA WITH TONOS"}
335         , {new Integer(0x00B7),	"MIDDLE DOT"}
336         , {new Integer(0x0388),	"GREEK CAPITAL LETTER EPSILON WITH TONOS"}
337         , {new Integer(0x0389),	"GREEK CAPITAL LETTER ETA WITH TONOS"}
338         , {new Integer(0x038A),	"GREEK CAPITAL LETTER IOTA WITH TONOS"}
339         , {new Integer(0x00BB),	"RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"}
340         , {new Integer(0x038C),	"GREEK CAPITAL LETTER OMICRON WITH TONOS"}
341         , {new Integer(0x00BD),	"VULGAR FRACTION ONE HALF"}
342         , {new Integer(0x038E),	"GREEK CAPITAL LETTER UPSILON WITH TONOS"}
343         , {new Integer(0x038F),	"GREEK CAPITAL LETTER OMEGA WITH TONOS"}
344         , {new Integer(0x0390),	"GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"}
345         , {new Integer(0x0391),	"GREEK CAPITAL LETTER ALPHA"}
346         , {new Integer(0x0392),	"GREEK CAPITAL LETTER BETA"}
347         , {new Integer(0x0393),	"GREEK CAPITAL LETTER GAMMA"}
348         , {new Integer(0x0394),	"GREEK CAPITAL LETTER DELTA"}
349         , {new Integer(0x0395),	"GREEK CAPITAL LETTER EPSILON"}
350         , {new Integer(0x0396),	"GREEK CAPITAL LETTER ZETA"}
351         , {new Integer(0x0397),	"GREEK CAPITAL LETTER ETA"}
352         , {new Integer(0x0398),	"GREEK CAPITAL LETTER THETA"}
353         , {new Integer(0x0399),	"GREEK CAPITAL LETTER IOTA"}
354         , {new Integer(0x039A),	"GREEK CAPITAL LETTER KAPPA"}
355         , {new Integer(0x039B),	"GREEK CAPITAL LETTER LAMDA"}
356         , {new Integer(0x039C),	"GREEK CAPITAL LETTER MU"}
357         , {new Integer(0x039D),	"GREEK CAPITAL LETTER NU"}
358         , {new Integer(0x039E),	"GREEK CAPITAL LETTER XI"}
359         , {new Integer(0x039F),	"GREEK CAPITAL LETTER OMICRON"}
360         , {new Integer(0x03A0),	"GREEK CAPITAL LETTER PI"}
361         , {new Integer(0x03A1),	"GREEK CAPITAL LETTER RHO"}
362         , {new Integer(0x03A3),	"GREEK CAPITAL LETTER SIGMA"}
363         , {new Integer(0x03A4),	"GREEK CAPITAL LETTER TAU"}
364         , {new Integer(0x03A5),	"GREEK CAPITAL LETTER UPSILON"}
365         , {new Integer(0x03A6),	"GREEK CAPITAL LETTER PHI"}
366         , {new Integer(0x03A7),	"GREEK CAPITAL LETTER CHI"}
367         , {new Integer(0x03A8),	"GREEK CAPITAL LETTER PSI"}
368         , {new Integer(0x03A9),	"GREEK CAPITAL LETTER OMEGA"}
369         , {new Integer(0x03AA),	"GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"}
370         , {new Integer(0x03AB),	"GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"}
371         , {new Integer(0x03AC),	"GREEK SMALL LETTER ALPHA WITH TONOS"}
372         , {new Integer(0x03AD),	"GREEK SMALL LETTER EPSILON WITH TONOS"}
373         , {new Integer(0x03AE),	"GREEK SMALL LETTER ETA WITH TONOS"}
374         , {new Integer(0x03AF),	"GREEK SMALL LETTER IOTA WITH TONOS"}
375         , {new Integer(0x03B0),	"GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"}
376         , {new Integer(0x03B1),	"GREEK SMALL LETTER ALPHA"}
377         , {new Integer(0x03B2),	"GREEK SMALL LETTER BETA"}
378         , {new Integer(0x03B3),	"GREEK SMALL LETTER GAMMA"}
379         , {new Integer(0x03B4),	"GREEK SMALL LETTER DELTA"}
380         , {new Integer(0x03B5),	"GREEK SMALL LETTER EPSILON"}
381         , {new Integer(0x03B6),	"GREEK SMALL LETTER ZETA"}
382         , {new Integer(0x03B7),	"GREEK SMALL LETTER ETA"}
383         , {new Integer(0x03B8),	"GREEK SMALL LETTER THETA"}
384         , {new Integer(0x03B9),	"GREEK SMALL LETTER IOTA"}
385         , {new Integer(0x03BA),	"GREEK SMALL LETTER KAPPA"}
386         , {new Integer(0x03BB),	"GREEK SMALL LETTER LAMDA"}
387         , {new Integer(0x03BC),	"GREEK SMALL LETTER MU"}
388         , {new Integer(0x03BD),	"GREEK SMALL LETTER NU"}
389         , {new Integer(0x03BE),	"GREEK SMALL LETTER XI"}
390         , {new Integer(0x03BF),	"GREEK SMALL LETTER OMICRON"}
391         , {new Integer(0x03C0),	"GREEK SMALL LETTER PI"}
392         , {new Integer(0x03C1),	"GREEK SMALL LETTER RHO"}
393         , {new Integer(0x03C2),	"GREEK SMALL LETTER FINAL SIGMA"}
394         , {new Integer(0x03C3),	"GREEK SMALL LETTER SIGMA"}
395         , {new Integer(0x03C4),	"GREEK SMALL LETTER TAU"}
396         , {new Integer(0x03C5),	"GREEK SMALL LETTER UPSILON"}
397         , {new Integer(0x03C6),	"GREEK SMALL LETTER PHI"}
398         , {new Integer(0x03C7),	"GREEK SMALL LETTER CHI"}
399         , {new Integer(0x03C8),	"GREEK SMALL LETTER PSI"}
400         , {new Integer(0x03C9),	"GREEK SMALL LETTER OMEGA"}
401         , {new Integer(0x03CA),	"GREEK SMALL LETTER IOTA WITH DIALYTIKA"}
402         , {new Integer(0x03CB),	"GREEK SMALL LETTER UPSILON WITH DIALYTIKA"}
403         , {new Integer(0x03CC),	"GREEK SMALL LETTER OMICRON WITH TONOS"}
404         , {new Integer(0x03CD),	"GREEK SMALL LETTER UPSILON WITH TONOS"}
405         , {new Integer(0x03CE),	"GREEK SMALL LETTER OMEGA WITH TONOS"}
406     };
407 
408 }
409