• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4  *******************************************************************************
5  * Copyright (C) 2002-2010, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.dev.tool.layout;
11 
12 import java.util.Vector;
13 
14 import com.ibm.icu.impl.Utility;
15 import com.ibm.icu.lang.UCharacter;
16 import com.ibm.icu.lang.UScript;
17 import com.ibm.icu.text.CanonicalIterator;
18 import com.ibm.icu.text.UTF16;
19 import com.ibm.icu.text.UnicodeSet;
20 
21 public class CanonicalCharacterData
22 {
23     private static int THRESHOLD = 4;
24 
25     public class Record
26     {
27         // TODO: might want to save arrays of Char32's rather than UTF16 strings...
Record(int character, int script)28         Record(int character, int script)
29         {
30             String char32 = UCharacter.toString(character);
31             CanonicalIterator iterator = new CanonicalIterator(char32);
32             Vector equivs = new Vector();
33 
34             composed = character;
35 
36             for (String equiv = iterator.next(); equiv != null; equiv = iterator.next()) {
37                 // Skip all equivalents of length 1; it's either the original
38                 // character or something like Angstrom for A-Ring, which we don't care about
39                 if (UTF16.countCodePoint(equiv) > 1) {
40                     equivs.add(equiv);
41                 }
42             }
43 
44             int nEquivalents = equivs.size();
45 
46             if (nEquivalents > maxEquivalents[script]) {
47                 maxEquivalents[script] = nEquivalents;
48             }
49 
50             if (nEquivalents > 0) {
51                 equivalents = new String[nEquivalents];
52 
53                 if (nEquivalents > THRESHOLD) {
54                     dumpEquivalents(character, equivs);
55                 }
56 
57                 sortEquivalents(equivalents, equivs);
58             }
59         }
60 
getComposedCharacter()61         public int getComposedCharacter()
62         {
63             return composed;
64         }
65 
countEquivalents()66         public int countEquivalents()
67         {
68             if (equivalents == null) {
69                 return 0;
70             }
71 
72             return equivalents.length;
73         }
74 
getEquivalents()75         public String[] getEquivalents()
76         {
77             return equivalents;
78         }
79 
getEquivalent(int index)80         public String getEquivalent(int index)
81         {
82             if (equivalents == null || index < 0 || index >= equivalents.length) {
83                 return null;
84             }
85 
86             return equivalents[index];
87         }
88 
dumpEquivalents(int character, Vector equivs)89         private void dumpEquivalents(int character, Vector equivs)
90         {
91             int count = equivs.size();
92 
93             System.out.println(Utility.hex(character, 6) + " - " + count + ":");
94 
95             for (int i = 0; i < count; i += 1) {
96                 String equiv = (String) equivs.elementAt(i);
97                 int codePoints = UTF16.countCodePoint(equiv);
98 
99                 for (int c = 0; c < codePoints; c += 1) {
100                     if (c > 0) {
101                         System.out.print(" ");
102                     }
103 
104                     System.out.print(Utility.hex(UTF16.charAt(equiv, c), 6));
105                 }
106 
107                 System.out.println();
108             }
109 
110             System.out.println();
111         }
112 
113         private int composed;
114         private String[] equivalents = null;
115     }
116 
CanonicalCharacterData()117     public CanonicalCharacterData()
118     {
119         // nothing to do...
120     }
121 
add(int character)122     public void add(int character)
123     {
124         int script = UScript.getScript(character);
125         Vector recordVector = recordVectors[script];
126 
127         if (recordVector == null) {
128             recordVector = recordVectors[script] = new Vector();
129         }
130 
131         recordVector.add(new Record(character, script));
132     }
133 
getMaxEquivalents(int script)134     public int getMaxEquivalents(int script)
135     {
136         if (script < 0 || script >= UScript.CODE_LIMIT) {
137             return 0;
138         }
139 
140         return maxEquivalents[script];
141     }
142 
getRecord(int script, int index)143     public Record getRecord(int script, int index)
144     {
145         if (script < 0 || script >= UScript.CODE_LIMIT) {
146             return null;
147         }
148 
149         Vector recordVector = recordVectors[script];
150 
151         if (recordVector == null || index < 0 || index >= recordVector.size()) {
152             return null;
153         }
154 
155         return (Record) recordVector.elementAt(index);
156     }
157 
countRecords(int script)158     public int countRecords(int script)
159     {
160         if (script < 0 || script >= UScript.CODE_LIMIT ||
161             recordVectors[script] == null) {
162             return 0;
163         }
164 
165         return recordVectors[script].size();
166     }
167 
factory(UnicodeSet characterSet)168     public static CanonicalCharacterData factory(UnicodeSet characterSet)
169     {
170         int charCount = characterSet.size();
171         CanonicalCharacterData data = new CanonicalCharacterData();
172 
173         System.out.println("There are " + charCount + " characters with a canonical decomposition.");
174 
175         for (int i = 0; i < charCount; i += 1) {
176             data.add(characterSet.charAt(i));
177         }
178 
179         return data;
180     }
181 
compareEquivalents(String a, String b)182     private static int compareEquivalents(String a, String b)
183     {
184         int result = UTF16.countCodePoint(a) - UTF16.countCodePoint(b);
185 
186         if (result == 0) {
187             return a.compareTo(b);
188         }
189 
190         return result;
191     }
192 
193     //
194     // Straight insertion sort from Knuth vol. III, pg. 81
195     //
sortEquivalents(String[] equivalents, Vector unsorted)196     private static void sortEquivalents(String[] equivalents, Vector unsorted)
197     {
198         int nEquivalents = equivalents.length;
199 
200         for (int e = 0; e < nEquivalents; e += 1) {
201             String v = (String) unsorted.elementAt(e);
202             int i;
203 
204             for (i = e - 1; i >= 0; i -= 1) {
205                 if (compareEquivalents(v, equivalents[i]) >= 0) {
206                   break;
207                 }
208 
209                 equivalents[i + 1] = equivalents[i];
210             }
211 
212             equivalents[i + 1] = v;
213        }
214     }
215 
216     private Vector recordVectors[] = new Vector[UScript.CODE_LIMIT];
217     private int maxEquivalents[] = new int[UScript.CODE_LIMIT];
218 
219 }
220