1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2002-2010, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.dev.tool.translit; 10 11 import java.io.FileOutputStream; 12 import java.io.OutputStreamWriter; 13 import java.util.Hashtable; 14 15 import com.ibm.icu.impl.Utility; 16 import com.ibm.icu.lang.UCharacter; 17 import com.ibm.icu.text.Transliterator; 18 import com.ibm.icu.text.UTF16; 19 import com.ibm.icu.text.UnicodeSet; 20 import com.ibm.icu.text.UnicodeSetIterator; 21 /** 22 * @author ram 23 * 24 * To change this generated comment edit the template variable "typecomment": 25 * Window>Preferences>Java>Templates. 26 * To enable and disable the creation of type comments go to 27 * Window>Preferences>Java>Code Generation.7F 28 */ 29 public class WriteIndicCharts { 30 main(String[] args)31 public static void main(String[] args){ 32 writeIICharts(); 33 } 34 35 36 static String header = "<html>\n" + 37 " <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"+ 38 " Inter-Indic Transliteration Comparison chart"+ 39 " </head>\n"+ 40 " <body bgcolor=#FFFFFF>\n"+ 41 " <table border=1 width=100% >\n"+ 42 " <tr>\n"+ 43 " <th width=9%>Inter-Indic</th>\n"+ 44 " <th width=9%>Latin</th>\n"+ 45 " <th width=9%>Devanagari</th>\n"+ 46 " <th width=9%>Bengali</th>\n"+ 47 " <th width=9%>Gurmukhi</th>\n"+ 48 " <th width=9%>Gujarati</th>\n"+ 49 " <th width=9%>Oriya</th>\n"+ 50 " <th width=9%>Tamil</th>\n"+ 51 " <th width=9%>Telugu</th>\n"+ 52 " <th width=9%>Kannada</th>\n"+ 53 " <th width=9%>Malayalam</th>\n"+ 54 " </tr>\n"; 55 static String footer = " </table>\n"+ 56 " </body>\n" + 57 "</html>\n"; 58 59 static UnicodeSet deva = new UnicodeSet("[:deva:]"); 60 static UnicodeSet beng = new UnicodeSet("[:beng:]"); 61 static UnicodeSet gujr = new UnicodeSet("[:gujr:]"); 62 static UnicodeSet guru = new UnicodeSet("[:guru:]"); 63 static UnicodeSet orya = new UnicodeSet("[:orya:]"); 64 static UnicodeSet taml = new UnicodeSet("[:taml:]"); 65 static UnicodeSet telu = new UnicodeSet("[:telu:]"); 66 static UnicodeSet knda = new UnicodeSet("[:knda:]"); 67 static UnicodeSet mlym = new UnicodeSet("[:mlym:]"); 68 static UnicodeSet inter= new UnicodeSet("[\uE000-\uE082]"); 69 writeIICharts()70 public static void writeIICharts(){ 71 try{ 72 Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali"); 73 Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi"); 74 Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati"); 75 Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya"); 76 Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil"); 77 Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu"); 78 Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada"); 79 Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam"); 80 Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari"); 81 Transliterator t10 = Transliterator.getInstance("InterIndic-Latin"); 82 //UnicodeSetIterator sIter = new UnicodeSetIterator(deva); 83 84 for(int i=0x00;i<=0x80;i++){ 85 String[] arr = new String[10]; 86 arr[0]=UTF16.valueOf(i+ 0xE000); 87 table.put(UTF16.valueOf(i),arr); 88 } 89 90 OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8"); 91 92 os.write(header); 93 94 writeIICharts(t9,0x0900,1); 95 writeIICharts(t1,0x0980,2); 96 writeIICharts(t2,0x0A00,3); 97 writeIICharts(t3,0x0A80,4); 98 writeIICharts(t4,0x0B00,5); 99 writeIICharts(t5,0x0B80,6); 100 writeIICharts(t6,0x0c00,7); 101 writeIICharts(t7,0x0C80,8); 102 writeIICharts(t8,0x0D00,9); 103 104 for(int i=0x00;i<=0x80;i++){ 105 String[] temp = (String[])table.get(UTF16.valueOf(i)); 106 boolean write = false; 107 for(int k=1;k<temp.length && temp[k]!=null;k++){ 108 if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0 || 109 temp[k].indexOf(":UNASSIGNED")<0){ 110 write = true; 111 } 112 } 113 if(write){ 114 os.write(" <tr>\n"); 115 for(int j=0; j<temp.length;j++){ 116 if(temp[j]!=null){ 117 boolean fallback=false; 118 boolean unassigned=false; 119 boolean unmapped = false; 120 boolean consumed =false; 121 String str = temp[j]; 122 123 if(temp[j].indexOf(":FALLBACK")>=0){ 124 str = temp[j].substring(0,temp[j].indexOf(":")); 125 fallback=true; 126 // os.write(" <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 127 } 128 if(temp[j].indexOf(":UNASSIGNED")>=0){ 129 str = temp[j].substring(0,temp[j].indexOf(":")); 130 unassigned=true; 131 } 132 133 if(temp[j].indexOf(":UNMAPPED")>=0){ 134 str = temp[j].substring(0,temp[j].indexOf(":")); 135 unmapped=true; 136 } 137 if(temp[j].indexOf(":CONSUMED")>=0){ 138 str = temp[j].substring(0,temp[j].indexOf(":")); 139 consumed=true; 140 } 141 142 String name; 143 StringBuffer nameBuf=new StringBuffer(); 144 for(int f=0; f<str.length();f++){ 145 if(f>0){ nameBuf.append("+");} 146 nameBuf.append(UCharacter.getExtendedName(UTF16.charAt(str,f))); 147 } 148 name = nameBuf.toString(); 149 if(fallback){ 150 151 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){ 152 os.write(" <td width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 153 }else{ 154 os.write(" <td width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 155 } 156 }else if(unmapped){ 157 os.write(" <td bgcolor=#FF9999 align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 158 }else if(unassigned){ 159 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){ 160 os.write(" <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 161 }else{ 162 os.write(" <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 163 } 164 }else if(consumed){ 165 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){ 166 os.write(" <td width=9% bgcolor=#FFFF55 align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 167 }else{ 168 os.write(" <td width=9% bgcolor=#FFFF55 align=center title=\""+""+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 169 } 170 }else if(name.indexOf("private")!=-1){ 171 String s = t10.transliterate(str); 172 os.write(" <td width=9% bgcolor=#FFBBBB align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 173 if(!s.equals(str)){ 174 os.write(" <td width=9% bgcolor=#CCEEDD align=center>"+s +"</td>"); 175 }else{ 176 os.write(" <td width=9% bgcolor=#CCEEDD align=center> </td>"); 177 } 178 }else{ 179 os.write(" <td width=9% align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 180 } 181 }else{ 182 os.write(" <td width=9% > </td>\n"); 183 } 184 } 185 os.write(" </tr>\n"); 186 } 187 } 188 os.write(footer); 189 os.close(); 190 }catch( Exception e){ 191 e.printStackTrace(); 192 } 193 } writeCharts()194 public static void writeCharts(){ 195 try{ 196 Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali"); 197 Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi"); 198 Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati"); 199 Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya"); 200 Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil"); 201 Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu"); 202 Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada"); 203 Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam"); 204 Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari"); 205 206 //UnicodeSetIterator sIter = new UnicodeSetIterator(deva); 207 208 for(int i=0x0900;i<=0x097F;i++){ 209 String[] arr = new String[10]; 210 arr[0]=UTF16.valueOf((i&0xFF) + 0xE000); 211 table.put(UTF16.valueOf(i),arr); 212 } 213 214 OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8"); 215 216 os.write(header); 217 /* 218 writeCharts(t1,beng,1); 219 writeCharts(t2,guru,2); 220 writeCharts(t3,gujr,3); 221 writeCharts(t4,orya,4); 222 writeCharts(t5,taml,5); 223 writeCharts(t6,telu,6); 224 writeCharts(t7,knda,7); 225 writeCharts(t8,mlym,8); 226 */ 227 /* 228 writeCharts(t9,0x0900,1); 229 writeCharts(t1,0x0980,2); 230 writeCharts(t2,0x0A00,3); 231 writeCharts(t3,0x0A80,4); 232 writeCharts(t4,0x0B00,5); 233 writeCharts(t5,0x0B80,6); 234 writeCharts(t6,0x0c00,7); 235 writeCharts(t7,0x0C80,8); 236 writeCharts(t8,0x0D00,9); 237 */ 238 writeIICharts(t9,0x0900,1); 239 writeIICharts(t1,0x0980,2); 240 writeIICharts(t2,0x0A00,3); 241 writeIICharts(t3,0x0A80,4); 242 writeIICharts(t4,0x0B00,5); 243 writeIICharts(t5,0x0B80,6); 244 writeIICharts(t6,0x0c00,7); 245 writeIICharts(t7,0x0C80,8); 246 writeIICharts(t8,0x0D00,9); 247 for(int i=0x0900;i<=0x097F;i++){ 248 String[] temp = (String[])table.get(UTF16.valueOf(i)); 249 boolean write = false; 250 for(int k=1;k<temp.length;k++){ 251 if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0){ 252 write = true; 253 } 254 } 255 if(write){ 256 os.write(" <tr>\n"); 257 for(int j=0; j<temp.length;j++){ 258 if(temp[j]!=null){ 259 boolean fallback=false; 260 String str = temp[j]; 261 262 if(temp[j].indexOf(":FALLBACK")>=0){ 263 str = temp[j].substring(0,temp[j].indexOf(":")); 264 fallback=true; 265 // os.write(" <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 266 } 267 String name = UCharacter.getExtendedName(UTF16.charAt(str,0)); 268 if(fallback){ 269 os.write(" <td bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 270 }else if(name.indexOf("unassigned")!=-1){ 271 os.write(" <td bgcolor=#CCCCCC align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 272 }else if(name.indexOf("private")!=-1){ 273 274 275 os.write(" <td bgcolor=#FFBBBB align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 276 277 }else{ 278 os.write(" <td align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 279 } 280 }else{ 281 os.write(" <td> </td>\n"); 282 } 283 } 284 os.write(" </tr>\n"); 285 } 286 } 287 os.write(footer); 288 os.close(); 289 }catch( Exception e){ 290 e.printStackTrace(); 291 } 292 } 293 static Hashtable table = new Hashtable(); getKey(int cp)294 static String getKey(int cp){ 295 int delta = cp & 0xFF; 296 delta-= (delta>0x7f)? 0x80 : 0; 297 //delta+=0x0900; 298 return UTF16.valueOf(delta); 299 } 300 writeCharts(Transliterator trans, int start, int index)301 public static void writeCharts(Transliterator trans, int start, int index){ 302 303 Transliterator inverse = trans.getInverse(); 304 for(int i=0;i<=0x7f;i++){ 305 String cp = UTF16.valueOf(start+i); 306 String s1 = inverse.transliterate(cp); 307 String s2 = trans.transliterate(s1); 308 309 String[] arr = (String[])table.get(getKey(start+i)); 310 if(cp.equals(s2)){ 311 arr[index] = s1; 312 }else{ 313 arr[index] = s1 + ":FALLBACK"; 314 } 315 } 316 } 317 writeIICharts(Transliterator trans,int start, int index)318 public static void writeIICharts(Transliterator trans,int start, int index){ 319 320 Transliterator inverse = trans.getInverse(); 321 UnicodeSetIterator iter = new UnicodeSetIterator(inter); 322 323 while(iter.next()){ 324 String cp =UTF16.valueOf(iter.codepoint); 325 String s1 = trans.transliterate(cp); 326 String s2 = inverse.transliterate(s1); 327 String[] arr = (String[])table.get(UTF16.valueOf(iter.codepoint&0xFF)); 328 if(cp.equals(s1)){ 329 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":UNASSIGNED"; 330 }else if(cp.equals(s2)){ 331 arr[index] = s1; 332 }else if(s1.equals(s2)){ 333 if(s1.equals("")){ 334 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED"; 335 }else{ 336 arr[index] = s1+ ":FALLBACK"; 337 } 338 } else{ 339 if(s2.equals("")){ 340 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED"; 341 }else{ 342 arr[index] = s1+ ":FALLBACK"; 343 } 344 } 345 } 346 } writeCharts(Transliterator trans, UnicodeSet target, int index)347 public static void writeCharts(Transliterator trans, UnicodeSet target, int index){ 348 UnicodeSetIterator tIter = new UnicodeSetIterator(target); 349 Transliterator inverse = trans.getInverse(); 350 while(tIter.next()){ 351 String cp = UTF16.valueOf(tIter.codepoint); 352 String s1 = inverse.transliterate(cp); 353 String s2 = trans.transliterate(s1); 354 355 String[] arr = (String[])table.get(getKey(tIter.codepoint)); 356 if(cp.equals(s2)){ 357 arr[index] = cp; 358 }else{ 359 arr[index] = cp + ":FALLBACK"; 360 } 361 } 362 } 363 } 364 365