• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (c) 2001-2010, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *   Date        Name        Description
9 *   11/29/2001  aliu        Creation.
10 *   06/26/2002  aliu        Moved to com.ibm.icu.dev.tool.translit
11 **********************************************************************
12 */
13 package com.ibm.icu.dev.tool.translit;
14 import java.io.File;
15 import java.io.FileOutputStream;
16 import java.io.IOException;
17 import java.io.OutputStreamWriter;
18 import java.io.PrintWriter;
19 import java.util.Enumeration;
20 
21 import com.ibm.icu.text.Normalizer;
22 import com.ibm.icu.text.Transliterator;
23 import com.ibm.icu.text.UnicodeSet;
24 
25 /**
26  * Class that generates source set information for a transliterator.
27  *
28  * To run, use:
29  *
30  *   java com.ibm.icu.dev.tool.translit.SourceSet Latin-Katakana NFD lower
31  *
32  * Output is produced in the command console, and a file with more detail is also written.
33  *
34  * To see if it works, use:
35  *
36  *   java com.ibm.icu.dev.test.translit.TransliteratorTest -v -nothrow TestIncrementalProgress
37  *
38  * and
39  *
40  *   java com.ibm.icu.dev.demo.translit.Demo
41  */
42 public class SourceSet {
43 
main(String[] args)44     public static void main(String[] args) throws IOException {
45         if (args.length == 0) {
46             // Compute and display the source sets for all system
47             // transliterators.
48             for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
49                 String ID = (String) e.nextElement();
50                 showSourceSet(ID, Normalizer.NONE, false);
51             }
52         } else {
53             // Usage: ID [NFKD | NFD] [lower]
54             Normalizer.Mode m = Normalizer.NONE;
55             boolean lowerFirst = false;
56             if (args.length >= 2) {
57                 if (args[1].equalsIgnoreCase("NFD")) {
58                     m = Normalizer.NFD;
59                 } else if (args[1].equalsIgnoreCase("NFKD")) {
60                     m = Normalizer.NFKD;
61                 } else {
62                     usage();
63                 }
64             }
65             if (args.length >= 3) {
66                 if (args[2].equalsIgnoreCase("lower")) {
67                     lowerFirst = true;
68                 } else {
69                     usage();
70                 }
71             }
72             if (args.length > 3) {
73                 usage();
74             }
75             showSourceSet(args[0], m, lowerFirst);
76         }
77     }
78 
showSourceSet(String ID, Normalizer.Mode m, boolean lowerFirst)79     static void showSourceSet(String ID, Normalizer.Mode m, boolean lowerFirst) throws IOException {
80         File f = new File("UnicodeSetClosure.txt");
81         String filename = f.getCanonicalFile().toString();
82         out = new PrintWriter(
83             new OutputStreamWriter(
84                 new FileOutputStream(filename), "UTF-8"));
85         out.print('\uFEFF'); // BOM
86         System.out.println();
87         System.out.println("Writing " + filename);
88         Transliterator t = Transliterator.getInstance(ID);
89         showSourceSetAux(t, m, lowerFirst, true);
90         showSourceSetAux(t.getInverse(), m, lowerFirst, false);
91         out.close();
92     }
93 
94     static PrintWriter out;
95 
showSourceSetAux(Transliterator t, Normalizer.Mode m, boolean lowerFirst, boolean forward)96     static void showSourceSetAux(Transliterator t, Normalizer.Mode m, boolean lowerFirst, boolean forward) {
97         UnicodeSet sourceSet = t.getSourceSet();
98         if (m != Normalizer.NONE || lowerFirst) {
99             UnicodeSetClosure.close(sourceSet, m, lowerFirst);
100         }
101         System.out.println(t.getID() + ": " +
102                            sourceSet.toPattern(true));
103         out.println("# MINIMAL FILTER GENERATED FOR: " + t.getID() + (forward ? "" : " REVERSE"));
104         out.println(":: "
105             + (forward ? "" : "( ")
106             + sourceSet.toPattern(true)
107             + (forward ? "" : " )")
108             + " ;");
109         out.println("# Unicode: " + sourceSet.toPattern(false));
110         out.println();
111     }
112 
usage()113     static void usage() {
114         System.err.println("Usage: ID [ NFD|NFKD [lower] ]");
115         System.exit(1);
116     }
117 }
118