/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id$ */ package org.apache.qetest; import java.io.File; import java.io.FileOutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; /** * Simple utility for writing XML documents from character tables. * * @author scott_boag@lotus.com * @author shane_curcuru@lotus.com * @version $Id$ */ public class CharTables { /** * Write a chars table to a file. * * Simply uses new OutputStreamWriter(..., fileencoding). * * @param chars array of Objects, Integer char code and * String description thereof (only including applicable codes) * @param includeUnencoded, or simply don't write them out at all * @param xmlencoding the XML name used in encoding= attr * @param fileencoding the encoding to output to * @param filename to write to * @throws any underlying exceptions */ public static void writeCharTableFile(Object[][] chars, boolean includeUnencoded, String xmlencoding, String fileencoding, String filename) throws Exception { File f = new File(filename); FileOutputStream fos = new FileOutputStream(f); PrintWriter writer = new PrintWriter(new OutputStreamWriter(fos, fileencoding)); writer.println(""); writer.println(""); CharTables.writeCharTable(chars, includeUnencoded, xmlencoding, writer); writer.println(""); writer.close(); } /** * Write a chars table to a stream. * * @param chars array of Objects, Integer char code and * String description thereof (only including applicable codes) * @param includeUnencoded, or simply don't write them out at all * @param encoding the encoding to output to * @param writer where to write to * @throws any underlying exceptions */ public static void writeCharTable(Object[][] chars, boolean includeUnencoded, String encoding, PrintWriter writer) throws Exception { writer.println(CHARS_HEADER + encoding + "\" includeUnencoded=\"" + includeUnencoded + "\">"); int numChars = chars.length; for ( int x = 0x20; x <= 0x03CE+4/* 0xD7FF */; x++ ) { int i; for ( i = 0; i < numChars; i++ ) { final int code = ((Integer)(chars[i][0])).intValue(); if ( code == x ) { writer.print(CHAR_HEADER + code + CHAR_HEADER2 + chars[i][1] + "\">"); switch ( code ) { case '&': writer.print(C_HEADER); writer.print("&"); writer.print(C_ENDER); break; case '<': writer.print(C_HEADER); writer.print("<"); writer.print(C_ENDER); break; default: writer.print(C_HEADER); writer.print(((char)code)); writer.print(C_ENDER); } writer.print(E_HEADER); writer.print("&#x"); writer.print(Integer.toHexString(code)); writer.print(";"); writer.print(E_ENDER); writer.println(CHAR_ENDER); break; // from for... } } // of for(i... // This character is not provided in the specified encoding if ( includeUnencoded && ( i == numChars )) { writer.print(CHAR_HEADER + x + CHAR_HEADER2 + "not encoded" + "\">"); // Since this character isn't in this encoding, // don't bother writing out the ELEM_C writer.print(E_HEADER); writer.print("&#x"); writer.print(Integer.toHexString(x)); writer.print(";"); writer.print(E_ENDER); writer.println(CHAR_ENDER); } }// of for(x... writer.println(CHARS_ENDER); writer.flush(); } // of writeCharTable /** chars elem - the whole table. */ public static final String ELEM_CHARS = "chars"; /** chars elem, enc attr - encoding of these chars. */ public static final String ATTR_ENC = "enc"; /** Convenience precalculated string. */ public static String CHARS_HEADER = "<" + ELEM_CHARS + " " + ATTR_ENC + "=\""; /** Convenience precalculated string. */ public static String CHARS_ENDER = ""; /** char elem - a single character. */ public static final String ELEM_CHAR = "char"; /** char elem, dec attr - decimal char code. */ public static final String ATTR_DEC = "dec"; /** char elem, desc attr - description. */ public static final String ATTR_DESC = "desc"; /** Convenience precalculated string. */ public static String CHAR_HEADER = "<" + ELEM_CHAR + " " + ATTR_DEC + "=\""; /** Convenience precalculated string. */ public static String CHAR_HEADER2 = "\" " + ATTR_DESC + "=\""; /** Convenience precalculated string. */ public static String CHAR_ENDER = ""; /** c elem - just the character in the encoding. */ public static final String ELEM_C = "c"; /** Convenience precalculated string. */ public static String C_HEADER = "<" + ELEM_C + ">"; /** Convenience precalculated string. */ public static String C_ENDER = ""; /** e elem - the entity reference to the character. */ public static final String ELEM_E = "e"; /** Convenience precalculated string. */ public static String E_HEADER = "<" + ELEM_E + ">"; /** Convenience precalculated string. */ public static String E_ENDER = ""; /** * Main method to run from the command line; sample usage. * @param args cmd line arguments */ public static void main(String[] args) { String filename = "chartable.xml"; if (args.length >= 1) { filename = args[0]; } String xmlencoding = "ISO-8859-7"; String fileencoding = "ISO8859_7"; try { // Sample usage with greek table, below CharTables.writeCharTableFile(greek, false, xmlencoding, fileencoding, filename); System.out.println("Wrote " + filename + " output in encodings " + xmlencoding + "/" + fileencoding); } catch (Exception e) { e.printStackTrace(); } } /** Sample data: greek/ISO-8859-7/ISO8859_7 . */ public static final Object greek[][] = { {new Integer(0x0020), "SPACE"} , {new Integer(0x0021), "EXCLAMATION MARK"} , {new Integer(0x0022), "QUOTATION MARK"} , {new Integer(0x0023), "NUMBER SIGN"} , {new Integer(0x0024), "DOLLAR SIGN"} , {new Integer(0x0025), "PERCENT SIGN"} , {new Integer(0x0026), "AMPERSAND"} , {new Integer(0x0027), "APOSTROPHE"} , {new Integer(0x0028), "LEFT PARENTHESIS"} , {new Integer(0x0029), "RIGHT PARENTHESIS"} , {new Integer(0x002A), "ASTERISK"} , {new Integer(0x002B), "PLUS SIGN"} , {new Integer(0x002C), "COMMA"} , {new Integer(0x002D), "HYPHEN-MINUS"} , {new Integer(0x002E), "FULL STOP"} , {new Integer(0x002F), "SOLIDUS"} , {new Integer(0x0030), "DIGIT ZERO"} , {new Integer(0x0031), "DIGIT ONE"} , {new Integer(0x0032), "DIGIT TWO"} , {new Integer(0x0033), "DIGIT THREE"} , {new Integer(0x0034), "DIGIT FOUR"} , {new Integer(0x0035), "DIGIT FIVE"} , {new Integer(0x0036), "DIGIT SIX"} , {new Integer(0x0037), "DIGIT SEVEN"} , {new Integer(0x0038), "DIGIT EIGHT"} , {new Integer(0x0039), "DIGIT NINE"} , {new Integer(0x003A), "COLON"} , {new Integer(0x003B), "SEMICOLON"} , {new Integer(0x003C), "LESS-THAN SIGN"} , {new Integer(0x003D), "EQUALS SIGN"} , {new Integer(0x003E), "GREATER-THAN SIGN"} , {new Integer(0x003F), "QUESTION MARK"} , {new Integer(0x0040), "COMMERCIAL AT"} , {new Integer(0x0041), "LATIN CAPITAL LETTER A"} , {new Integer(0x0042), "LATIN CAPITAL LETTER B"} , {new Integer(0x0043), "LATIN CAPITAL LETTER C"} , {new Integer(0x0044), "LATIN CAPITAL LETTER D"} , {new Integer(0x0045), "LATIN CAPITAL LETTER E"} , {new Integer(0x0046), "LATIN CAPITAL LETTER F"} , {new Integer(0x0047), "LATIN CAPITAL LETTER G"} , {new Integer(0x0048), "LATIN CAPITAL LETTER H"} , {new Integer(0x0049), "LATIN CAPITAL LETTER I"} , {new Integer(0x004A), "LATIN CAPITAL LETTER J"} , {new Integer(0x004B), "LATIN CAPITAL LETTER K"} , {new Integer(0x004C), "LATIN CAPITAL LETTER L"} , {new Integer(0x004D), "LATIN CAPITAL LETTER M"} , {new Integer(0x004E), "LATIN CAPITAL LETTER N"} , {new Integer(0x004F), "LATIN CAPITAL LETTER O"} , {new Integer(0x0050), "LATIN CAPITAL LETTER P"} , {new Integer(0x0051), "LATIN CAPITAL LETTER Q"} , {new Integer(0x0052), "LATIN CAPITAL LETTER R"} , {new Integer(0x0053), "LATIN CAPITAL LETTER S"} , {new Integer(0x0054), "LATIN CAPITAL LETTER T"} , {new Integer(0x0055), "LATIN CAPITAL LETTER U"} , {new Integer(0x0056), "LATIN CAPITAL LETTER V"} , {new Integer(0x0057), "LATIN CAPITAL LETTER W"} , {new Integer(0x0058), "LATIN CAPITAL LETTER X"} , {new Integer(0x0059), "LATIN CAPITAL LETTER Y"} , {new Integer(0x005A), "LATIN CAPITAL LETTER Z"} , {new Integer(0x005B), "LEFT SQUARE BRACKET"} , {new Integer(0x005C), "REVERSE SOLIDUS"} , {new Integer(0x005D), "RIGHT SQUARE BRACKET"} , {new Integer(0x005E), "CIRCUMFLEX ACCENT"} , {new Integer(0x005F), "LOW LINE"} , {new Integer(0x0060), "GRAVE ACCENT"} , {new Integer(0x0061), "LATIN SMALL LETTER A"} , {new Integer(0x0062), "LATIN SMALL LETTER B"} , {new Integer(0x0063), "LATIN SMALL LETTER C"} , {new Integer(0x0064), "LATIN SMALL LETTER D"} , {new Integer(0x0065), "LATIN SMALL LETTER E"} , {new Integer(0x0066), "LATIN SMALL LETTER F"} , {new Integer(0x0067), "LATIN SMALL LETTER G"} , {new Integer(0x0068), "LATIN SMALL LETTER H"} , {new Integer(0x0069), "LATIN SMALL LETTER I"} , {new Integer(0x006A), "LATIN SMALL LETTER J"} , {new Integer(0x006B), "LATIN SMALL LETTER K"} , {new Integer(0x006C), "LATIN SMALL LETTER L"} , {new Integer(0x006D), "LATIN SMALL LETTER M"} , {new Integer(0x006E), "LATIN SMALL LETTER N"} , {new Integer(0x006F), "LATIN SMALL LETTER O"} , {new Integer(0x0070), "LATIN SMALL LETTER P"} , {new Integer(0x0071), "LATIN SMALL LETTER Q"} , {new Integer(0x0072), "LATIN SMALL LETTER R"} , {new Integer(0x0073), "LATIN SMALL LETTER S"} , {new Integer(0x0074), "LATIN SMALL LETTER T"} , {new Integer(0x0075), "LATIN SMALL LETTER U"} , {new Integer(0x0076), "LATIN SMALL LETTER V"} , {new Integer(0x0077), "LATIN SMALL LETTER W"} , {new Integer(0x0078), "LATIN SMALL LETTER X"} , {new Integer(0x0079), "LATIN SMALL LETTER Y"} , {new Integer(0x007A), "LATIN SMALL LETTER Z"} , {new Integer(0x007B), "LEFT CURLY BRACKET"} , {new Integer(0x007C), "VERTICAL LINE"} , {new Integer(0x007D), "RIGHT CURLY BRACKET"} , {new Integer(0x007E), "TILDE"} , {new Integer(0x00A0), "NO-BREAK SPACE"} , {new Integer(0x02BD), "MODIFIER LETTER REVERSED COMMA"} , {new Integer(0x02BC), "MODIFIER LETTER APOSTROPHE"} , {new Integer(0x00A3), "POUND SIGN"} , {new Integer(0x00A6), "BROKEN BAR"} , {new Integer(0x00A7), "SECTION SIGN"} , {new Integer(0x00A8), "DIAERESIS"} , {new Integer(0x00A9), "COPYRIGHT SIGN"} , {new Integer(0x00AB), "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"} , {new Integer(0x00AC), "NOT SIGN"} , {new Integer(0x00AD), "SOFT HYPHEN"} , {new Integer(0x2015), "HORIZONTAL BAR"} , {new Integer(0x00B0), "DEGREE SIGN"} , {new Integer(0x00B1), "PLUS-MINUS SIGN"} , {new Integer(0x00B2), "SUPERSCRIPT TWO"} , {new Integer(0x00B3), "SUPERSCRIPT THREE"} , {new Integer(0x0384), "GREEK TONOS"} , {new Integer(0x0385), "GREEK DIALYTIKA TONOS"} , {new Integer(0x0386), "GREEK CAPITAL LETTER ALPHA WITH TONOS"} , {new Integer(0x00B7), "MIDDLE DOT"} , {new Integer(0x0388), "GREEK CAPITAL LETTER EPSILON WITH TONOS"} , {new Integer(0x0389), "GREEK CAPITAL LETTER ETA WITH TONOS"} , {new Integer(0x038A), "GREEK CAPITAL LETTER IOTA WITH TONOS"} , {new Integer(0x00BB), "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK"} , {new Integer(0x038C), "GREEK CAPITAL LETTER OMICRON WITH TONOS"} , {new Integer(0x00BD), "VULGAR FRACTION ONE HALF"} , {new Integer(0x038E), "GREEK CAPITAL LETTER UPSILON WITH TONOS"} , {new Integer(0x038F), "GREEK CAPITAL LETTER OMEGA WITH TONOS"} , {new Integer(0x0390), "GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS"} , {new Integer(0x0391), "GREEK CAPITAL LETTER ALPHA"} , {new Integer(0x0392), "GREEK CAPITAL LETTER BETA"} , {new Integer(0x0393), "GREEK CAPITAL LETTER GAMMA"} , {new Integer(0x0394), "GREEK CAPITAL LETTER DELTA"} , {new Integer(0x0395), "GREEK CAPITAL LETTER EPSILON"} , {new Integer(0x0396), "GREEK CAPITAL LETTER ZETA"} , {new Integer(0x0397), "GREEK CAPITAL LETTER ETA"} , {new Integer(0x0398), "GREEK CAPITAL LETTER THETA"} , {new Integer(0x0399), "GREEK CAPITAL LETTER IOTA"} , {new Integer(0x039A), "GREEK CAPITAL LETTER KAPPA"} , {new Integer(0x039B), "GREEK CAPITAL LETTER LAMDA"} , {new Integer(0x039C), "GREEK CAPITAL LETTER MU"} , {new Integer(0x039D), "GREEK CAPITAL LETTER NU"} , {new Integer(0x039E), "GREEK CAPITAL LETTER XI"} , {new Integer(0x039F), "GREEK CAPITAL LETTER OMICRON"} , {new Integer(0x03A0), "GREEK CAPITAL LETTER PI"} , {new Integer(0x03A1), "GREEK CAPITAL LETTER RHO"} , {new Integer(0x03A3), "GREEK CAPITAL LETTER SIGMA"} , {new Integer(0x03A4), "GREEK CAPITAL LETTER TAU"} , {new Integer(0x03A5), "GREEK CAPITAL LETTER UPSILON"} , {new Integer(0x03A6), "GREEK CAPITAL LETTER PHI"} , {new Integer(0x03A7), "GREEK CAPITAL LETTER CHI"} , {new Integer(0x03A8), "GREEK CAPITAL LETTER PSI"} , {new Integer(0x03A9), "GREEK CAPITAL LETTER OMEGA"} , {new Integer(0x03AA), "GREEK CAPITAL LETTER IOTA WITH DIALYTIKA"} , {new Integer(0x03AB), "GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA"} , {new Integer(0x03AC), "GREEK SMALL LETTER ALPHA WITH TONOS"} , {new Integer(0x03AD), "GREEK SMALL LETTER EPSILON WITH TONOS"} , {new Integer(0x03AE), "GREEK SMALL LETTER ETA WITH TONOS"} , {new Integer(0x03AF), "GREEK SMALL LETTER IOTA WITH TONOS"} , {new Integer(0x03B0), "GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS"} , {new Integer(0x03B1), "GREEK SMALL LETTER ALPHA"} , {new Integer(0x03B2), "GREEK SMALL LETTER BETA"} , {new Integer(0x03B3), "GREEK SMALL LETTER GAMMA"} , {new Integer(0x03B4), "GREEK SMALL LETTER DELTA"} , {new Integer(0x03B5), "GREEK SMALL LETTER EPSILON"} , {new Integer(0x03B6), "GREEK SMALL LETTER ZETA"} , {new Integer(0x03B7), "GREEK SMALL LETTER ETA"} , {new Integer(0x03B8), "GREEK SMALL LETTER THETA"} , {new Integer(0x03B9), "GREEK SMALL LETTER IOTA"} , {new Integer(0x03BA), "GREEK SMALL LETTER KAPPA"} , {new Integer(0x03BB), "GREEK SMALL LETTER LAMDA"} , {new Integer(0x03BC), "GREEK SMALL LETTER MU"} , {new Integer(0x03BD), "GREEK SMALL LETTER NU"} , {new Integer(0x03BE), "GREEK SMALL LETTER XI"} , {new Integer(0x03BF), "GREEK SMALL LETTER OMICRON"} , {new Integer(0x03C0), "GREEK SMALL LETTER PI"} , {new Integer(0x03C1), "GREEK SMALL LETTER RHO"} , {new Integer(0x03C2), "GREEK SMALL LETTER FINAL SIGMA"} , {new Integer(0x03C3), "GREEK SMALL LETTER SIGMA"} , {new Integer(0x03C4), "GREEK SMALL LETTER TAU"} , {new Integer(0x03C5), "GREEK SMALL LETTER UPSILON"} , {new Integer(0x03C6), "GREEK SMALL LETTER PHI"} , {new Integer(0x03C7), "GREEK SMALL LETTER CHI"} , {new Integer(0x03C8), "GREEK SMALL LETTER PSI"} , {new Integer(0x03C9), "GREEK SMALL LETTER OMEGA"} , {new Integer(0x03CA), "GREEK SMALL LETTER IOTA WITH DIALYTIKA"} , {new Integer(0x03CB), "GREEK SMALL LETTER UPSILON WITH DIALYTIKA"} , {new Integer(0x03CC), "GREEK SMALL LETTER OMICRON WITH TONOS"} , {new Integer(0x03CD), "GREEK SMALL LETTER UPSILON WITH TONOS"} , {new Integer(0x03CE), "GREEK SMALL LETTER OMEGA WITH TONOS"} }; }