1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 /* 19 * $Id$ 20 */ 21 22 package org.apache.qetest; 23 import java.io.File; 24 import java.io.FileOutputStream; 25 import java.io.OutputStreamWriter; 26 import java.io.PrintWriter; 27 28 29 /** 30 * Simple utility for writing XML documents from character tables. 31 * 32 * @author scott_boag@lotus.com 33 * @author shane_curcuru@lotus.com 34 * @version $Id$ 35 */ 36 public class CharTables 37 { 38 39 /** 40 * Write a chars table to a file. 41 * 42 * Simply uses new OutputStreamWriter(..., fileencoding). 43 * 44 * @param chars array of Objects, Integer char code and 45 * String description thereof (only including applicable codes) 46 * @param includeUnencoded, or simply don't write them out at all 47 * @param xmlencoding the XML name used in encoding= attr 48 * @param fileencoding the encoding to output to 49 * @param filename to write to 50 * @throws any underlying exceptions 51 */ writeCharTableFile(Object[][] chars, boolean includeUnencoded, String xmlencoding, String fileencoding, String filename)52 public static void writeCharTableFile(Object[][] chars, boolean includeUnencoded, 53 String xmlencoding, String fileencoding, String filename) 54 throws Exception 55 { 56 File f = new File(filename); 57 FileOutputStream fos = new FileOutputStream(f); 58 PrintWriter writer = new PrintWriter(new OutputStreamWriter(fos, fileencoding)); 59 60 writer.println("<?xml version=\"1.0\" encoding=\"" + xmlencoding + "\"?>"); 61 writer.println("<chartables fileencoding=\"" + fileencoding + "\">"); 62 CharTables.writeCharTable(chars, includeUnencoded, xmlencoding, writer); 63 writer.println("</chartables>"); 64 writer.close(); 65 } 66 67 /** 68 * Write a chars table to a stream. 69 * 70 * @param chars array of Objects, Integer char code and 71 * String description thereof (only including applicable codes) 72 * @param includeUnencoded, or simply don't write them out at all 73 * @param encoding the encoding to output to 74 * @param writer where to write to 75 * @throws any underlying exceptions 76 */ writeCharTable(Object[][] chars, boolean includeUnencoded, String encoding, PrintWriter writer)77 public static void writeCharTable(Object[][] chars, boolean includeUnencoded, 78 String encoding, PrintWriter writer) 79 throws Exception 80 { 81 writer.println(CHARS_HEADER + encoding + "\" includeUnencoded=\"" + includeUnencoded + "\">"); 82 int numChars = chars.length; 83 84 for ( int x = 0x20; x <= 0x03CE+4/* 0xD7FF */; x++ ) 85 { 86 int i; 87 for ( i = 0; i < numChars; i++ ) 88 { 89 final int code = ((Integer)(chars[i][0])).intValue(); 90 91 if ( code == x ) 92 { 93 writer.print(CHAR_HEADER + code + CHAR_HEADER2 + chars[i][1] + "\">"); 94 switch ( code ) 95 { 96 case '&': 97 writer.print(C_HEADER); 98 writer.print("&"); 99 writer.print(C_ENDER); 100 break; 101 case '<': 102 writer.print(C_HEADER); 103 writer.print("<"); 104 writer.print(C_ENDER); 105 break; 106 default: 107 writer.print(C_HEADER); 108 writer.print(((char)code)); 109 writer.print(C_ENDER); 110 } 111 writer.print(E_HEADER); 112 writer.print("&#x"); 113 writer.print(Integer.toHexString(code)); 114 writer.print(";"); 115 writer.print(E_ENDER); 116 writer.println(CHAR_ENDER); 117 break; // from for... 118 } 119 } // of for(i... 120 // This character is not provided in the specified encoding 121 if ( includeUnencoded && ( i == numChars )) 122 { 123 writer.print(CHAR_HEADER + x + CHAR_HEADER2 + "not encoded" + "\">"); 124 // Since this character isn't in this encoding, 125 // don't bother writing out the ELEM_C 126 writer.print(E_HEADER); 127 writer.print("&#x"); 128 writer.print(Integer.toHexString(x)); 129 writer.print(";"); 130 writer.print(E_ENDER); 131 writer.println(CHAR_ENDER); 132 } 133 134 }// of for(x... 135 136 writer.println(CHARS_ENDER); 137 writer.flush(); 138 } // of writeCharTable 139 140 141 /** chars elem - the whole table. */ 142 public static final String ELEM_CHARS = "chars"; 143 144 /** chars elem, enc attr - encoding of these chars. */ 145 public static final String ATTR_ENC = "enc"; 146 147 /** Convenience precalculated string. */ 148 public static String CHARS_HEADER = "<" + ELEM_CHARS + " " + ATTR_ENC + "=\""; 149 150 /** Convenience precalculated string. */ 151 public static String CHARS_ENDER = "</" + ELEM_CHARS + ">"; 152 153 /** char elem - a single character. */ 154 public static final String ELEM_CHAR = "char"; 155 156 /** char elem, dec attr - decimal char code. */ 157 public static final String ATTR_DEC = "dec"; 158 159 /** char elem, desc attr - description. */ 160 public static final String ATTR_DESC = "desc"; 161 162 /** Convenience precalculated string. */ 163 public static String CHAR_HEADER = "<" + ELEM_CHAR + " " + ATTR_DEC + "=\""; 164 165 /** Convenience precalculated string. */ 166 public static String CHAR_HEADER2 = "\" " + ATTR_DESC + "=\""; 167 168 /** Convenience precalculated string. */ 169 public static String CHAR_ENDER = "</" + ELEM_CHAR + ">"; 170 171 172 /** c elem - just the character in the encoding. */ 173 public static final String ELEM_C = "c"; 174 175 /** Convenience precalculated string. */ 176 public static String C_HEADER = "<" + ELEM_C + ">"; 177 178 /** Convenience precalculated string. */ 179 public static String C_ENDER = "</" + ELEM_C + ">"; 180 181 182 /** e elem - the entity reference to the character. */ 183 public static final String ELEM_E = "e"; 184 185 /** Convenience precalculated string. */ 186 public static String E_HEADER = "<" + ELEM_E + ">"; 187 188 /** Convenience precalculated string. */ 189 public static String E_ENDER = "</" + ELEM_E + ">"; 190 191 192 /** 193 * Main method to run from the command line; sample usage. 194 * @param args cmd line arguments 195 */ main(String[] args)196 public static void main(String[] args) 197 { 198 String filename = "chartable.xml"; 199 if (args.length >= 1) 200 { 201 filename = args[0]; 202 } 203 String xmlencoding = "ISO-8859-7"; 204 String fileencoding = "ISO8859_7"; 205 try 206 { 207 // Sample usage with greek table, below 208 CharTables.writeCharTableFile(greek, false, xmlencoding, fileencoding, filename); 209 System.out.println("Wrote " + filename + " output in encodings " + xmlencoding + "/" + fileencoding); 210 } 211 catch (Exception e) 212 { 213 e.printStackTrace(); 214 } 215 } 216 217 218 /** Sample data: greek/ISO-8859-7/ISO8859_7 . */ 219 public static final Object greek[][] = 220 { 221 {new Integer(0x0020), "SPACE"} 222 , {new Integer(0x0021), "EXCLAMATION MARK"} 223 , {new Integer(0x0022), "QUOTATION MARK"} 224 , {new Integer(0x0023), "NUMBER SIGN"} 225 , {new Integer(0x0024), "DOLLAR SIGN"} 226 , {new Integer(0x0025), "PERCENT SIGN"} 227 , {new Integer(0x0026), "AMPERSAND"} 228 , {new Integer(0x0027), "APOSTROPHE"} 229 , {new Integer(0x0028), "LEFT PARENTHESIS"} 230 , {new Integer(0x0029), "RIGHT PARENTHESIS"} 231 , {new Integer(0x002A), "ASTERISK"} 232 , {new Integer(0x002B), "PLUS SIGN"} 233 , {new Integer(0x002C), "COMMA"} 234 , {new Integer(0x002D), "HYPHEN-MINUS"} 235 , {new Integer(0x002E), "FULL STOP"} 236 , {new Integer(0x002F), "SOLIDUS"} 237 , {new Integer(0x0030), "DIGIT ZERO"} 238 , {new Integer(0x0031), "DIGIT ONE"} 239 , {new Integer(0x0032), "DIGIT TWO"} 240 , {new Integer(0x0033), "DIGIT THREE"} 241 , {new Integer(0x0034), "DIGIT FOUR"} 242 , {new Integer(0x0035), "DIGIT FIVE"} 243 , {new Integer(0x0036), "DIGIT SIX"} 244 , {new Integer(0x0037), "DIGIT SEVEN"} 245 , {new Integer(0x0038), "DIGIT EIGHT"} 246 , {new Integer(0x0039), "DIGIT NINE"} 247 , {new Integer(0x003A), "COLON"} 248 , {new Integer(0x003B), "SEMICOLON"} 249 , {new Integer(0x003C), "LESS-THAN SIGN"} 250 , {new Integer(0x003D), "EQUALS SIGN"} 251 , {new Integer(0x003E), "GREATER-THAN SIGN"} 252 , {new Integer(0x003F), "QUESTION MARK"} 253 , {new Integer(0x0040), "COMMERCIAL AT"} 254 , {new Integer(0x0041), "LATIN CAPITAL LETTER A"} 255 , {new Integer(0x0042), "LATIN CAPITAL LETTER B"} 256 , {new Integer(0x0043), "LATIN CAPITAL LETTER C"} 257 , {new Integer(0x0044), "LATIN CAPITAL LETTER D"} 258 , {new Integer(0x0045), "LATIN CAPITAL LETTER E"} 259 , {new Integer(0x0046), "LATIN CAPITAL LETTER F"} 260 , {new Integer(0x0047), "LATIN CAPITAL LETTER G"} 261 , {new Integer(0x0048), "LATIN CAPITAL LETTER H"} 262 , {new Integer(0x0049), "LATIN CAPITAL LETTER I"} 263 , {new Integer(0x004A), "LATIN CAPITAL LETTER J"} 264 , {new Integer(0x004B), "LATIN CAPITAL LETTER K"} 265 , {new Integer(0x004C), "LATIN CAPITAL LETTER L"} 266 , {new Integer(0x004D), "LATIN CAPITAL LETTER M"} 267 , {new Integer(0x004E), "LATIN CAPITAL LETTER N"} 268 , {new Integer(0x004F), "LATIN CAPITAL LETTER O"} 269 , {new Integer(0x0050), "LATIN CAPITAL LETTER P"} 270 , {new Integer(0x0051), "LATIN CAPITAL LETTER Q"} 271 , {new Integer(0x0052), "LATIN CAPITAL LETTER R"} 272 , {new Integer(0x0053), "LATIN CAPITAL LETTER S"} 273 , {new Integer(0x0054), "LATIN CAPITAL LETTER T"} 274 , {new Integer(0x0055), "LATIN CAPITAL LETTER U"} 275 , {new Integer(0x0056), "LATIN CAPITAL LETTER V"} 276 , {new Integer(0x0057), "LATIN CAPITAL LETTER W"} 277 , {new Integer(0x0058), "LATIN CAPITAL LETTER X"} 278 , {new Integer(0x0059), "LATIN CAPITAL LETTER Y"} 279 , {new Integer(0x005A), "LATIN CAPITAL LETTER Z"} 280 , {new Integer(0x005B), "LEFT SQUARE BRACKET"} 281 , {new Integer(0x005C), "REVERSE SOLIDUS"} 282 , {new Integer(0x005D), "RIGHT SQUARE BRACKET"} 283 , {new Integer(0x005E), "CIRCUMFLEX ACCENT"} 284 , {new Integer(0x005F), "LOW LINE"} 285 , {new Integer(0x0060), "GRAVE ACCENT"} 286 , {new Integer(0x0061), "LATIN SMALL LETTER A"} 287 , {new Integer(0x0062), "LATIN SMALL LETTER B"} 288 , {new Integer(0x0063), "LATIN SMALL LETTER C"} 289 , {new Integer(0x0064), "LATIN SMALL LETTER D"} 290 , {new Integer(0x0065), "LATIN SMALL LETTER E"} 291 , {new Integer(0x0066), "LATIN SMALL LETTER F"} 292 , {new Integer(0x0067), "LATIN SMALL LETTER G"} 293 , {new Integer(0x0068), "LATIN SMALL LETTER H"} 294 , {new Integer(0x0069), "LATIN SMALL LETTER I"} 295 , {new Integer(0x006A), "LATIN SMALL LETTER J"} 296 , {new Integer(0x006B), "LATIN SMALL LETTER K"} 297 , {new Integer(0x006C), "LATIN SMALL LETTER L"} 298 , {new Integer(0x006D), "LATIN SMALL LETTER M"} 299 , {new Integer(0x006E), "LATIN SMALL LETTER N"} 300 , {new Integer(0x006F), "LATIN SMALL LETTER O"} 301 , {new Integer(0x0070), "LATIN SMALL LETTER P"} 302 , {new Integer(0x0071), "LATIN SMALL LETTER Q"} 303 , {new Integer(0x0072), "LATIN SMALL LETTER R"} 304 , {new Integer(0x0073), "LATIN SMALL LETTER S"} 305 , {new Integer(0x0074), "LATIN SMALL LETTER T"} 306 , {new Integer(0x0075), "LATIN SMALL LETTER U"} 307 , {new Integer(0x0076), "LATIN SMALL LETTER V"} 308 , {new Integer(0x0077), "LATIN SMALL LETTER W"} 309 , {new Integer(0x0078), "LATIN SMALL LETTER X"} 310 , {new Integer(0x0079), "LATIN SMALL LETTER Y"} 311 , {new Integer(0x007A), "LATIN SMALL LETTER Z"} 312 , {new Integer(0x007B), "LEFT CURLY BRACKET"} 313 , {new Integer(0x007C), "VERTICAL LINE"} 314 , {new Integer(0x007D), "RIGHT CURLY BRACKET"} 315 , {new Integer(0x007E), "TILDE"} 316 , {new Integer(0x00A0), "NO-BREAK SPACE"} 317 , {new Integer(0x02BD), "MODIFIER LETTER REVERSED COMMA"} 318 , {new Integer(0x02BC), "MODIFIER LETTER APOSTROPHE"} 319 , {new Integer(0x00A3), "POUND SIGN"} 320 , {new Integer(0x00A6), "BROKEN BAR"} 321 , {new Integer(0x00A7), "SECTION SIGN"} 322 , {new Integer(0x00A8), "DIAERESIS"} 323 , {new Integer(0x00A9), "COPYRIGHT SIGN"} 324 , {new Integer(0x00AB), "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"} 325 , {new Integer(0x00AC), "NOT SIGN"} 326 , {new Integer(0x00AD), "SOFT HYPHEN"} 327 , {new Integer(0x2015), "HORIZONTAL BAR"} 328 , {new Integer(0x00B0), "DEGREE SIGN"} 329 , {new Integer(0x00B1), "PLUS-MINUS SIGN"} 330 , {new Integer(0x00B2), "SUPERSCRIPT TWO"} 331 , {new Integer(0x00B3), "SUPERSCRIPT THREE"} 332 , {new Integer(0x0384), "GREEK TONOS"} 333 , {new Integer(0x0385), "GREEK DIALYTIKA TONOS"} 334 , {new Integer(0x0386), "GREEK CAPITAL LETTER ALPHA WITH TONOS"} 335 , {new Integer(0x00B7), "MIDDLE DOT"} 336 , {new Integer(0x0388), "GREEK CAPITAL LETTER EPSILON WITH TONOS"} 337 , {new Integer(0x0389), "GREEK CAPITAL LETTER ETA WITH TONOS"} 338 , {new Integer(0x038A), "GREEK CAPITAL LETTER IOTA WITH TONOS"} 339 , {new Integer(0x00BB), "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"} 340 , {new Integer(0x038C), "GREEK CAPITAL LETTER OMICRON WITH TONOS"} 341 , {new Integer(0x00BD), "VULGAR FRACTION ONE HALF"} 342 , {new Integer(0x038E), "GREEK CAPITAL LETTER UPSILON WITH TONOS"} 343 , {new Integer(0x038F), "GREEK CAPITAL LETTER OMEGA WITH TONOS"} 344 , {new Integer(0x0390), "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"} 345 , {new Integer(0x0391), "GREEK CAPITAL LETTER ALPHA"} 346 , {new Integer(0x0392), "GREEK CAPITAL LETTER BETA"} 347 , {new Integer(0x0393), "GREEK CAPITAL LETTER GAMMA"} 348 , {new Integer(0x0394), "GREEK CAPITAL LETTER DELTA"} 349 , {new Integer(0x0395), "GREEK CAPITAL LETTER EPSILON"} 350 , {new Integer(0x0396), "GREEK CAPITAL LETTER ZETA"} 351 , {new Integer(0x0397), "GREEK CAPITAL LETTER ETA"} 352 , {new Integer(0x0398), "GREEK CAPITAL LETTER THETA"} 353 , {new Integer(0x0399), "GREEK CAPITAL LETTER IOTA"} 354 , {new Integer(0x039A), "GREEK CAPITAL LETTER KAPPA"} 355 , {new Integer(0x039B), "GREEK CAPITAL LETTER LAMDA"} 356 , {new Integer(0x039C), "GREEK CAPITAL LETTER MU"} 357 , {new Integer(0x039D), "GREEK CAPITAL LETTER NU"} 358 , {new Integer(0x039E), "GREEK CAPITAL LETTER XI"} 359 , {new Integer(0x039F), "GREEK CAPITAL LETTER OMICRON"} 360 , {new Integer(0x03A0), "GREEK CAPITAL LETTER PI"} 361 , {new Integer(0x03A1), "GREEK CAPITAL LETTER RHO"} 362 , {new Integer(0x03A3), "GREEK CAPITAL LETTER SIGMA"} 363 , {new Integer(0x03A4), "GREEK CAPITAL LETTER TAU"} 364 , {new Integer(0x03A5), "GREEK CAPITAL LETTER UPSILON"} 365 , {new Integer(0x03A6), "GREEK CAPITAL LETTER PHI"} 366 , {new Integer(0x03A7), "GREEK CAPITAL LETTER CHI"} 367 , {new Integer(0x03A8), "GREEK CAPITAL LETTER PSI"} 368 , {new Integer(0x03A9), "GREEK CAPITAL LETTER OMEGA"} 369 , {new Integer(0x03AA), "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"} 370 , {new Integer(0x03AB), "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"} 371 , {new Integer(0x03AC), "GREEK SMALL LETTER ALPHA WITH TONOS"} 372 , {new Integer(0x03AD), "GREEK SMALL LETTER EPSILON WITH TONOS"} 373 , {new Integer(0x03AE), "GREEK SMALL LETTER ETA WITH TONOS"} 374 , {new Integer(0x03AF), "GREEK SMALL LETTER IOTA WITH TONOS"} 375 , {new Integer(0x03B0), "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"} 376 , {new Integer(0x03B1), "GREEK SMALL LETTER ALPHA"} 377 , {new Integer(0x03B2), "GREEK SMALL LETTER BETA"} 378 , {new Integer(0x03B3), "GREEK SMALL LETTER GAMMA"} 379 , {new Integer(0x03B4), "GREEK SMALL LETTER DELTA"} 380 , {new Integer(0x03B5), "GREEK SMALL LETTER EPSILON"} 381 , {new Integer(0x03B6), "GREEK SMALL LETTER ZETA"} 382 , {new Integer(0x03B7), "GREEK SMALL LETTER ETA"} 383 , {new Integer(0x03B8), "GREEK SMALL LETTER THETA"} 384 , {new Integer(0x03B9), "GREEK SMALL LETTER IOTA"} 385 , {new Integer(0x03BA), "GREEK SMALL LETTER KAPPA"} 386 , {new Integer(0x03BB), "GREEK SMALL LETTER LAMDA"} 387 , {new Integer(0x03BC), "GREEK SMALL LETTER MU"} 388 , {new Integer(0x03BD), "GREEK SMALL LETTER NU"} 389 , {new Integer(0x03BE), "GREEK SMALL LETTER XI"} 390 , {new Integer(0x03BF), "GREEK SMALL LETTER OMICRON"} 391 , {new Integer(0x03C0), "GREEK SMALL LETTER PI"} 392 , {new Integer(0x03C1), "GREEK SMALL LETTER RHO"} 393 , {new Integer(0x03C2), "GREEK SMALL LETTER FINAL SIGMA"} 394 , {new Integer(0x03C3), "GREEK SMALL LETTER SIGMA"} 395 , {new Integer(0x03C4), "GREEK SMALL LETTER TAU"} 396 , {new Integer(0x03C5), "GREEK SMALL LETTER UPSILON"} 397 , {new Integer(0x03C6), "GREEK SMALL LETTER PHI"} 398 , {new Integer(0x03C7), "GREEK SMALL LETTER CHI"} 399 , {new Integer(0x03C8), "GREEK SMALL LETTER PSI"} 400 , {new Integer(0x03C9), "GREEK SMALL LETTER OMEGA"} 401 , {new Integer(0x03CA), "GREEK SMALL LETTER IOTA WITH DIALYTIKA"} 402 , {new Integer(0x03CB), "GREEK SMALL LETTER UPSILON WITH DIALYTIKA"} 403 , {new Integer(0x03CC), "GREEK SMALL LETTER OMICRON WITH TONOS"} 404 , {new Integer(0x03CD), "GREEK SMALL LETTER UPSILON WITH TONOS"} 405 , {new Integer(0x03CE), "GREEK SMALL LETTER OMEGA WITH TONOS"} 406 }; 407 408 } 409