1 /**************************************************************** 2 * Licensed to the Apache Software Foundation (ASF) under one * 3 * or more contributor license agreements. See the NOTICE file * 4 * distributed with this work for additional information * 5 * regarding copyright ownership. The ASF licenses this file * 6 * to you under the Apache License, Version 2.0 (the * 7 * "License"); you may not use this file except in compliance * 8 * with the License. You may obtain a copy of the License at * 9 * * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, * 13 * software distributed under the License is distributed on an * 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 15 * KIND, either express or implied. See the License for the * 16 * specific language governing permissions and limitations * 17 * under the License. * 18 ****************************************************************/ 19 20 package org.apache.james.mime4j.util; 21 22 import java.io.UnsupportedEncodingException; 23 import java.nio.charset.IllegalCharsetNameException; 24 import java.nio.charset.UnsupportedCharsetException; 25 import java.util.HashMap; 26 import java.util.Locale; 27 import java.util.TreeSet; 28 29 //BEGIN android-changed: Stubbing out logging 30 import org.apache.james.mime4j.Log; 31 import org.apache.james.mime4j.LogFactory; 32 //END android-changed 33 34 /** 35 * Utility class for working with character sets. It is somewhat similar to 36 * the Java 1.4 <code>java.nio.charset.Charset</code> class but knows many 37 * more aliases and is compatible with Java 1.3. It will use a simple detection 38 * mechanism to detect what character sets the current VM supports. This will 39 * be a sub-set of the character sets listed in the 40 * <a href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html"> 41 * Java 1.5 (J2SE5.0) Supported Encodings</a> document. 42 * <p> 43 * The <a href="http://www.iana.org/assignments/character-sets"> 44 * IANA Character Sets</a> document has been used to determine the preferred 45 * MIME character set names and to get a list of known aliases. 46 * <p> 47 * This is a complete list of the character sets known to this class: 48 * <table> 49 * <tr> 50 * <td>Canonical (Java) name</td> 51 * <td>MIME preferred</td> 52 * <td>Aliases</td> 53 * </tr> 54 * <tr> 55 * <td>ASCII</td> 56 * <td>US-ASCII</td> 57 * <td>ANSI_X3.4-1968 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ISO646-US us IBM367 cp367 csASCII ascii7 646 iso_646.irv:1983 </td> 58 * </tr> 59 * <tr> 60 * <td>Big5</td> 61 * <td>Big5</td> 62 * <td>csBig5 CN-Big5 BIG-FIVE BIGFIVE </td> 63 * </tr> 64 * <tr> 65 * <td>Big5_HKSCS</td> 66 * <td>Big5-HKSCS</td> 67 * <td>big5hkscs </td> 68 * </tr> 69 * <tr> 70 * <td>Big5_Solaris</td> 71 * <td>?</td> 72 * <td></td> 73 * </tr> 74 * <tr> 75 * <td>Cp037</td> 76 * <td>IBM037</td> 77 * <td>ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 </td> 78 * </tr> 79 * <tr> 80 * <td>Cp1006</td> 81 * <td>?</td> 82 * <td></td> 83 * </tr> 84 * <tr> 85 * <td>Cp1025</td> 86 * <td>?</td> 87 * <td></td> 88 * </tr> 89 * <tr> 90 * <td>Cp1026</td> 91 * <td>IBM1026</td> 92 * <td>csIBM1026 </td> 93 * </tr> 94 * <tr> 95 * <td>Cp1046</td> 96 * <td>?</td> 97 * <td></td> 98 * </tr> 99 * <tr> 100 * <td>Cp1047</td> 101 * <td>IBM1047</td> 102 * <td>IBM-1047 </td> 103 * </tr> 104 * <tr> 105 * <td>Cp1097</td> 106 * <td>?</td> 107 * <td></td> 108 * </tr> 109 * <tr> 110 * <td>Cp1098</td> 111 * <td>?</td> 112 * <td></td> 113 * </tr> 114 * <tr> 115 * <td>Cp1112</td> 116 * <td>?</td> 117 * <td></td> 118 * </tr> 119 * <tr> 120 * <td>Cp1122</td> 121 * <td>?</td> 122 * <td></td> 123 * </tr> 124 * <tr> 125 * <td>Cp1123</td> 126 * <td>?</td> 127 * <td></td> 128 * </tr> 129 * <tr> 130 * <td>Cp1124</td> 131 * <td>?</td> 132 * <td></td> 133 * </tr> 134 * <tr> 135 * <td>Cp1140</td> 136 * <td>IBM01140</td> 137 * <td>CCSID01140 CP01140 ebcdic-us-37+euro </td> 138 * </tr> 139 * <tr> 140 * <td>Cp1141</td> 141 * <td>IBM01141</td> 142 * <td>CCSID01141 CP01141 ebcdic-de-273+euro </td> 143 * </tr> 144 * <tr> 145 * <td>Cp1142</td> 146 * <td>IBM01142</td> 147 * <td>CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro </td> 148 * </tr> 149 * <tr> 150 * <td>Cp1143</td> 151 * <td>IBM01143</td> 152 * <td>CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro </td> 153 * </tr> 154 * <tr> 155 * <td>Cp1144</td> 156 * <td>IBM01144</td> 157 * <td>CCSID01144 CP01144 ebcdic-it-280+euro </td> 158 * </tr> 159 * <tr> 160 * <td>Cp1145</td> 161 * <td>IBM01145</td> 162 * <td>CCSID01145 CP01145 ebcdic-es-284+euro </td> 163 * </tr> 164 * <tr> 165 * <td>Cp1146</td> 166 * <td>IBM01146</td> 167 * <td>CCSID01146 CP01146 ebcdic-gb-285+euro </td> 168 * </tr> 169 * <tr> 170 * <td>Cp1147</td> 171 * <td>IBM01147</td> 172 * <td>CCSID01147 CP01147 ebcdic-fr-297+euro </td> 173 * </tr> 174 * <tr> 175 * <td>Cp1148</td> 176 * <td>IBM01148</td> 177 * <td>CCSID01148 CP01148 ebcdic-international-500+euro </td> 178 * </tr> 179 * <tr> 180 * <td>Cp1149</td> 181 * <td>IBM01149</td> 182 * <td>CCSID01149 CP01149 ebcdic-is-871+euro </td> 183 * </tr> 184 * <tr> 185 * <td>Cp1250</td> 186 * <td>windows-1250</td> 187 * <td></td> 188 * </tr> 189 * <tr> 190 * <td>Cp1251</td> 191 * <td>windows-1251</td> 192 * <td></td> 193 * </tr> 194 * <tr> 195 * <td>Cp1252</td> 196 * <td>windows-1252</td> 197 * <td></td> 198 * </tr> 199 * <tr> 200 * <td>Cp1253</td> 201 * <td>windows-1253</td> 202 * <td></td> 203 * </tr> 204 * <tr> 205 * <td>Cp1254</td> 206 * <td>windows-1254</td> 207 * <td></td> 208 * </tr> 209 * <tr> 210 * <td>Cp1255</td> 211 * <td>windows-1255</td> 212 * <td></td> 213 * </tr> 214 * <tr> 215 * <td>Cp1256</td> 216 * <td>windows-1256</td> 217 * <td></td> 218 * </tr> 219 * <tr> 220 * <td>Cp1257</td> 221 * <td>windows-1257</td> 222 * <td></td> 223 * </tr> 224 * <tr> 225 * <td>Cp1258</td> 226 * <td>windows-1258</td> 227 * <td></td> 228 * </tr> 229 * <tr> 230 * <td>Cp1381</td> 231 * <td>?</td> 232 * <td></td> 233 * </tr> 234 * <tr> 235 * <td>Cp1383</td> 236 * <td>?</td> 237 * <td></td> 238 * </tr> 239 * <tr> 240 * <td>Cp273</td> 241 * <td>IBM273</td> 242 * <td>csIBM273 </td> 243 * </tr> 244 * <tr> 245 * <td>Cp277</td> 246 * <td>IBM277</td> 247 * <td>EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 </td> 248 * </tr> 249 * <tr> 250 * <td>Cp278</td> 251 * <td>IBM278</td> 252 * <td>CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278 </td> 253 * </tr> 254 * <tr> 255 * <td>Cp280</td> 256 * <td>IBM280</td> 257 * <td>ebcdic-cp-it csIBM280 </td> 258 * </tr> 259 * <tr> 260 * <td>Cp284</td> 261 * <td>IBM284</td> 262 * <td>ebcdic-cp-es csIBM284 </td> 263 * </tr> 264 * <tr> 265 * <td>Cp285</td> 266 * <td>IBM285</td> 267 * <td>ebcdic-cp-gb csIBM285 </td> 268 * </tr> 269 * <tr> 270 * <td>Cp297</td> 271 * <td>IBM297</td> 272 * <td>ebcdic-cp-fr csIBM297 </td> 273 * </tr> 274 * <tr> 275 * <td>Cp33722</td> 276 * <td>?</td> 277 * <td></td> 278 * </tr> 279 * <tr> 280 * <td>Cp420</td> 281 * <td>IBM420</td> 282 * <td>ebcdic-cp-ar1 csIBM420 </td> 283 * </tr> 284 * <tr> 285 * <td>Cp424</td> 286 * <td>IBM424</td> 287 * <td>ebcdic-cp-he csIBM424 </td> 288 * </tr> 289 * <tr> 290 * <td>Cp437</td> 291 * <td>IBM437</td> 292 * <td>437 csPC8CodePage437 </td> 293 * </tr> 294 * <tr> 295 * <td>Cp500</td> 296 * <td>IBM500</td> 297 * <td>ebcdic-cp-be ebcdic-cp-ch csIBM500 </td> 298 * </tr> 299 * <tr> 300 * <td>Cp737</td> 301 * <td>?</td> 302 * <td></td> 303 * </tr> 304 * <tr> 305 * <td>Cp775</td> 306 * <td>IBM775</td> 307 * <td>csPC775Baltic </td> 308 * </tr> 309 * <tr> 310 * <td>Cp838</td> 311 * <td>IBM-Thai</td> 312 * <td></td> 313 * </tr> 314 * <tr> 315 * <td>Cp850</td> 316 * <td>IBM850</td> 317 * <td>850 csPC850Multilingual </td> 318 * </tr> 319 * <tr> 320 * <td>Cp852</td> 321 * <td>IBM852</td> 322 * <td>852 csPCp852 </td> 323 * </tr> 324 * <tr> 325 * <td>Cp855</td> 326 * <td>IBM855</td> 327 * <td>855 csIBM855 </td> 328 * </tr> 329 * <tr> 330 * <td>Cp856</td> 331 * <td>?</td> 332 * <td></td> 333 * </tr> 334 * <tr> 335 * <td>Cp857</td> 336 * <td>IBM857</td> 337 * <td>857 csIBM857 </td> 338 * </tr> 339 * <tr> 340 * <td>Cp858</td> 341 * <td>IBM00858</td> 342 * <td>CCSID00858 CP00858 PC-Multilingual-850+euro </td> 343 * </tr> 344 * <tr> 345 * <td>Cp860</td> 346 * <td>IBM860</td> 347 * <td>860 csIBM860 </td> 348 * </tr> 349 * <tr> 350 * <td>Cp861</td> 351 * <td>IBM861</td> 352 * <td>861 cp-is csIBM861 </td> 353 * </tr> 354 * <tr> 355 * <td>Cp862</td> 356 * <td>IBM862</td> 357 * <td>862 csPC862LatinHebrew </td> 358 * </tr> 359 * <tr> 360 * <td>Cp863</td> 361 * <td>IBM863</td> 362 * <td>863 csIBM863 </td> 363 * </tr> 364 * <tr> 365 * <td>Cp864</td> 366 * <td>IBM864</td> 367 * <td>cp864 csIBM864 </td> 368 * </tr> 369 * <tr> 370 * <td>Cp865</td> 371 * <td>IBM865</td> 372 * <td>865 csIBM865 </td> 373 * </tr> 374 * <tr> 375 * <td>Cp866</td> 376 * <td>IBM866</td> 377 * <td>866 csIBM866 </td> 378 * </tr> 379 * <tr> 380 * <td>Cp868</td> 381 * <td>IBM868</td> 382 * <td>cp-ar csIBM868 </td> 383 * </tr> 384 * <tr> 385 * <td>Cp869</td> 386 * <td>IBM869</td> 387 * <td>cp-gr csIBM869 </td> 388 * </tr> 389 * <tr> 390 * <td>Cp870</td> 391 * <td>IBM870</td> 392 * <td>ebcdic-cp-roece ebcdic-cp-yu csIBM870 </td> 393 * </tr> 394 * <tr> 395 * <td>Cp871</td> 396 * <td>IBM871</td> 397 * <td>ebcdic-cp-is csIBM871 </td> 398 * </tr> 399 * <tr> 400 * <td>Cp875</td> 401 * <td>?</td> 402 * <td></td> 403 * </tr> 404 * <tr> 405 * <td>Cp918</td> 406 * <td>IBM918</td> 407 * <td>ebcdic-cp-ar2 csIBM918 </td> 408 * </tr> 409 * <tr> 410 * <td>Cp921</td> 411 * <td>?</td> 412 * <td></td> 413 * </tr> 414 * <tr> 415 * <td>Cp922</td> 416 * <td>?</td> 417 * <td></td> 418 * </tr> 419 * <tr> 420 * <td>Cp930</td> 421 * <td>?</td> 422 * <td></td> 423 * </tr> 424 * <tr> 425 * <td>Cp933</td> 426 * <td>?</td> 427 * <td></td> 428 * </tr> 429 * <tr> 430 * <td>Cp935</td> 431 * <td>?</td> 432 * <td></td> 433 * </tr> 434 * <tr> 435 * <td>Cp937</td> 436 * <td>?</td> 437 * <td></td> 438 * </tr> 439 * <tr> 440 * <td>Cp939</td> 441 * <td>?</td> 442 * <td></td> 443 * </tr> 444 * <tr> 445 * <td>Cp942</td> 446 * <td>?</td> 447 * <td></td> 448 * </tr> 449 * <tr> 450 * <td>Cp942C</td> 451 * <td>?</td> 452 * <td></td> 453 * </tr> 454 * <tr> 455 * <td>Cp943</td> 456 * <td>?</td> 457 * <td></td> 458 * </tr> 459 * <tr> 460 * <td>Cp943C</td> 461 * <td>?</td> 462 * <td></td> 463 * </tr> 464 * <tr> 465 * <td>Cp948</td> 466 * <td>?</td> 467 * <td></td> 468 * </tr> 469 * <tr> 470 * <td>Cp949</td> 471 * <td>?</td> 472 * <td></td> 473 * </tr> 474 * <tr> 475 * <td>Cp949C</td> 476 * <td>?</td> 477 * <td></td> 478 * </tr> 479 * <tr> 480 * <td>Cp950</td> 481 * <td>?</td> 482 * <td></td> 483 * </tr> 484 * <tr> 485 * <td>Cp964</td> 486 * <td>?</td> 487 * <td></td> 488 * </tr> 489 * <tr> 490 * <td>Cp970</td> 491 * <td>?</td> 492 * <td></td> 493 * </tr> 494 * <tr> 495 * <td>EUC_CN</td> 496 * <td>GB2312</td> 497 * <td>x-EUC-CN csGB2312 euccn euc-cn gb2312-80 gb2312-1980 CN-GB CN-GB-ISOIR165 </td> 498 * </tr> 499 * <tr> 500 * <td>EUC_JP</td> 501 * <td>EUC-JP</td> 502 * <td>csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese eucjis x-eucjp eucjp x-euc-jp </td> 503 * </tr> 504 * <tr> 505 * <td>EUC_JP_LINUX</td> 506 * <td>?</td> 507 * <td></td> 508 * </tr> 509 * <tr> 510 * <td>EUC_JP_Solaris</td> 511 * <td>?</td> 512 * <td></td> 513 * </tr> 514 * <tr> 515 * <td>EUC_KR</td> 516 * <td>EUC-KR</td> 517 * <td>csEUCKR ksc5601 5601 ksc5601_1987 ksc_5601 ksc5601-1987 ks_c_5601-1987 euckr </td> 518 * </tr> 519 * <tr> 520 * <td>EUC_TW</td> 521 * <td>EUC-TW</td> 522 * <td>x-EUC-TW cns11643 euctw </td> 523 * </tr> 524 * <tr> 525 * <td>GB18030</td> 526 * <td>GB18030</td> 527 * <td>gb18030-2000 </td> 528 * </tr> 529 * <tr> 530 * <td>GBK</td> 531 * <td>windows-936</td> 532 * <td>CP936 MS936 ms_936 x-mswin-936 </td> 533 * </tr> 534 * <tr> 535 * <td>ISCII91</td> 536 * <td>?</td> 537 * <td>x-ISCII91 iscii </td> 538 * </tr> 539 * <tr> 540 * <td>ISO2022CN</td> 541 * <td>ISO-2022-CN</td> 542 * <td></td> 543 * </tr> 544 * <tr> 545 * <td>ISO2022JP</td> 546 * <td>ISO-2022-JP</td> 547 * <td>csISO2022JP JIS jis_encoding csjisencoding </td> 548 * </tr> 549 * <tr> 550 * <td>ISO2022KR</td> 551 * <td>ISO-2022-KR</td> 552 * <td>csISO2022KR </td> 553 * </tr> 554 * <tr> 555 * <td>ISO2022_CN_CNS</td> 556 * <td>?</td> 557 * <td></td> 558 * </tr> 559 * <tr> 560 * <td>ISO2022_CN_GB</td> 561 * <td>?</td> 562 * <td></td> 563 * </tr> 564 * <tr> 565 * <td>ISO8859_1</td> 566 * <td>ISO-8859-1</td> 567 * <td>ISO_8859-1:1987 iso-ir-100 ISO_8859-1 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 819 IBM-819 ISO8859-1 ISO_8859_1 </td> 568 * </tr> 569 * <tr> 570 * <td>ISO8859_13</td> 571 * <td>ISO-8859-13</td> 572 * <td></td> 573 * </tr> 574 * <tr> 575 * <td>ISO8859_15</td> 576 * <td>ISO-8859-15</td> 577 * <td>ISO_8859-15 Latin-9 8859_15 csISOlatin9 IBM923 cp923 923 L9 IBM-923 ISO8859-15 LATIN9 LATIN0 csISOlatin0 ISO8859_15_FDIS </td> 578 * </tr> 579 * <tr> 580 * <td>ISO8859_2</td> 581 * <td>ISO-8859-2</td> 582 * <td>ISO_8859-2:1987 iso-ir-101 ISO_8859-2 latin2 l2 csISOLatin2 8859_2 iso8859_2 </td> 583 * </tr> 584 * <tr> 585 * <td>ISO8859_3</td> 586 * <td>ISO-8859-3</td> 587 * <td>ISO_8859-3:1988 iso-ir-109 ISO_8859-3 latin3 l3 csISOLatin3 8859_3 </td> 588 * </tr> 589 * <tr> 590 * <td>ISO8859_4</td> 591 * <td>ISO-8859-4</td> 592 * <td>ISO_8859-4:1988 iso-ir-110 ISO_8859-4 latin4 l4 csISOLatin4 8859_4 </td> 593 * </tr> 594 * <tr> 595 * <td>ISO8859_5</td> 596 * <td>ISO-8859-5</td> 597 * <td>ISO_8859-5:1988 iso-ir-144 ISO_8859-5 cyrillic csISOLatinCyrillic 8859_5 </td> 598 * </tr> 599 * <tr> 600 * <td>ISO8859_6</td> 601 * <td>ISO-8859-6</td> 602 * <td>ISO_8859-6:1987 iso-ir-127 ISO_8859-6 ECMA-114 ASMO-708 arabic csISOLatinArabic 8859_6 </td> 603 * </tr> 604 * <tr> 605 * <td>ISO8859_7</td> 606 * <td>ISO-8859-7</td> 607 * <td>ISO_8859-7:1987 iso-ir-126 ISO_8859-7 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 sun_eu_greek </td> 608 * </tr> 609 * <tr> 610 * <td>ISO8859_8</td> 611 * <td>ISO-8859-8</td> 612 * <td>ISO_8859-8:1988 iso-ir-138 ISO_8859-8 hebrew csISOLatinHebrew 8859_8 </td> 613 * </tr> 614 * <tr> 615 * <td>ISO8859_9</td> 616 * <td>ISO-8859-9</td> 617 * <td>ISO_8859-9:1989 iso-ir-148 ISO_8859-9 latin5 l5 csISOLatin5 8859_9 </td> 618 * </tr> 619 * <tr> 620 * <td>JISAutoDetect</td> 621 * <td>?</td> 622 * <td></td> 623 * </tr> 624 * <tr> 625 * <td>JIS_C6626-1983</td> 626 * <td>JIS_C6626-1983</td> 627 * <td>x-JIS0208 JIS0208 csISO87JISX0208 x0208 JIS_X0208-1983 iso-ir-87 </td> 628 * </tr> 629 * <tr> 630 * <td>JIS_X0201</td> 631 * <td>JIS_X0201</td> 632 * <td>X0201 JIS0201 csHalfWidthKatakana </td> 633 * </tr> 634 * <tr> 635 * <td>JIS_X0212-1990</td> 636 * <td>JIS_X0212-1990</td> 637 * <td>iso-ir-159 x0212 JIS0212 csISO159JISX02121990 </td> 638 * </tr> 639 * <tr> 640 * <td>KOI8_R</td> 641 * <td>KOI8-R</td> 642 * <td>csKOI8R koi8 </td> 643 * </tr> 644 * <tr> 645 * <td>MS874</td> 646 * <td>windows-874</td> 647 * <td>cp874 </td> 648 * </tr> 649 * <tr> 650 * <td>MS932</td> 651 * <td>Windows-31J</td> 652 * <td>windows-932 csWindows31J x-ms-cp932 </td> 653 * </tr> 654 * <tr> 655 * <td>MS949</td> 656 * <td>windows-949</td> 657 * <td>windows949 ms_949 x-windows-949 </td> 658 * </tr> 659 * <tr> 660 * <td>MS950</td> 661 * <td>windows-950</td> 662 * <td>x-windows-950 </td> 663 * </tr> 664 * <tr> 665 * <td>MS950_HKSCS</td> 666 * <td></td> 667 * <td></td> 668 * </tr> 669 * <tr> 670 * <td>MacArabic</td> 671 * <td>?</td> 672 * <td></td> 673 * </tr> 674 * <tr> 675 * <td>MacCentralEurope</td> 676 * <td>?</td> 677 * <td></td> 678 * </tr> 679 * <tr> 680 * <td>MacCroatian</td> 681 * <td>?</td> 682 * <td></td> 683 * </tr> 684 * <tr> 685 * <td>MacCyrillic</td> 686 * <td>?</td> 687 * <td></td> 688 * </tr> 689 * <tr> 690 * <td>MacDingbat</td> 691 * <td>?</td> 692 * <td></td> 693 * </tr> 694 * <tr> 695 * <td>MacGreek</td> 696 * <td>MacGreek</td> 697 * <td></td> 698 * </tr> 699 * <tr> 700 * <td>MacHebrew</td> 701 * <td>?</td> 702 * <td></td> 703 * </tr> 704 * <tr> 705 * <td>MacIceland</td> 706 * <td>?</td> 707 * <td></td> 708 * </tr> 709 * <tr> 710 * <td>MacRoman</td> 711 * <td>MacRoman</td> 712 * <td>Macintosh MAC csMacintosh </td> 713 * </tr> 714 * <tr> 715 * <td>MacRomania</td> 716 * <td>?</td> 717 * <td></td> 718 * </tr> 719 * <tr> 720 * <td>MacSymbol</td> 721 * <td>?</td> 722 * <td></td> 723 * </tr> 724 * <tr> 725 * <td>MacThai</td> 726 * <td>?</td> 727 * <td></td> 728 * </tr> 729 * <tr> 730 * <td>MacTurkish</td> 731 * <td>?</td> 732 * <td></td> 733 * </tr> 734 * <tr> 735 * <td>MacUkraine</td> 736 * <td>?</td> 737 * <td></td> 738 * </tr> 739 * <tr> 740 * <td>SJIS</td> 741 * <td>Shift_JIS</td> 742 * <td>MS_Kanji csShiftJIS shift-jis x-sjis pck </td> 743 * </tr> 744 * <tr> 745 * <td>TIS620</td> 746 * <td>TIS-620</td> 747 * <td></td> 748 * </tr> 749 * <tr> 750 * <td>UTF-16</td> 751 * <td>UTF-16</td> 752 * <td>UTF_16 </td> 753 * </tr> 754 * <tr> 755 * <td>UTF8</td> 756 * <td>UTF-8</td> 757 * <td></td> 758 * </tr> 759 * <tr> 760 * <td>UnicodeBig</td> 761 * <td>?</td> 762 * <td></td> 763 * </tr> 764 * <tr> 765 * <td>UnicodeBigUnmarked</td> 766 * <td>UTF-16BE</td> 767 * <td>X-UTF-16BE UTF_16BE ISO-10646-UCS-2 </td> 768 * </tr> 769 * <tr> 770 * <td>UnicodeLittle</td> 771 * <td>?</td> 772 * <td></td> 773 * </tr> 774 * <tr> 775 * <td>UnicodeLittleUnmarked</td> 776 * <td>UTF-16LE</td> 777 * <td>UTF_16LE X-UTF-16LE </td> 778 * </tr> 779 * <tr> 780 * <td>x-Johab</td> 781 * <td>johab</td> 782 * <td>johab cp1361 ms1361 ksc5601-1992 ksc5601_1992 </td> 783 * </tr> 784 * <tr> 785 * <td>x-iso-8859-11</td> 786 * <td>?</td> 787 * <td></td> 788 * </tr> 789 * </table> 790 * 791 * 792 * @version $Id: CharsetUtil.java,v 1.1 2004/10/25 07:26:46 ntherning Exp $ 793 */ 794 public class CharsetUtil { 795 private static Log log = LogFactory.getLog(CharsetUtil.class); 796 797 private static class Charset implements Comparable<Charset> { 798 private String canonical = null; 799 private String mime = null; 800 private String[] aliases = null; 801 Charset(String canonical, String mime, String[] aliases)802 private Charset(String canonical, String mime, String[] aliases) { 803 this.canonical = canonical; 804 this.mime = mime; 805 this.aliases = aliases; 806 } 807 compareTo(Charset c)808 public int compareTo(Charset c) { 809 return this.canonical.compareTo(c.canonical); 810 } 811 } 812 813 private static Charset[] JAVA_CHARSETS = { 814 new Charset("ISO8859_1", "ISO-8859-1", 815 new String[] {"ISO_8859-1:1987", "iso-ir-100", "ISO_8859-1", 816 "latin1", "l1", "IBM819", "CP819", 817 "csISOLatin1", "8859_1", "819", "IBM-819", 818 "ISO8859-1", "ISO_8859_1"}), 819 new Charset("ISO8859_2", "ISO-8859-2", 820 new String[] {"ISO_8859-2:1987", "iso-ir-101", "ISO_8859-2", 821 "latin2", "l2", "csISOLatin2", "8859_2", 822 "iso8859_2"}), 823 new Charset("ISO8859_3", "ISO-8859-3", new String[] {"ISO_8859-3:1988", "iso-ir-109", "ISO_8859-3", "latin3", "l3", "csISOLatin3", "8859_3"}), 824 new Charset("ISO8859_4", "ISO-8859-4", 825 new String[] {"ISO_8859-4:1988", "iso-ir-110", "ISO_8859-4", 826 "latin4", "l4", "csISOLatin4", "8859_4"}), 827 new Charset("ISO8859_5", "ISO-8859-5", 828 new String[] {"ISO_8859-5:1988", "iso-ir-144", "ISO_8859-5", 829 "cyrillic", "csISOLatinCyrillic", "8859_5"}), 830 new Charset("ISO8859_6", "ISO-8859-6", new String[] {"ISO_8859-6:1987", "iso-ir-127", "ISO_8859-6", "ECMA-114", "ASMO-708", "arabic", "csISOLatinArabic", "8859_6"}), 831 new Charset("ISO8859_7", "ISO-8859-7", 832 new String[] {"ISO_8859-7:1987", "iso-ir-126", "ISO_8859-7", 833 "ELOT_928", "ECMA-118", "greek", "greek8", 834 "csISOLatinGreek", "8859_7", "sun_eu_greek"}), 835 new Charset("ISO8859_8", "ISO-8859-8", new String[] {"ISO_8859-8:1988", "iso-ir-138", "ISO_8859-8", "hebrew", "csISOLatinHebrew", "8859_8"}), 836 new Charset("ISO8859_9", "ISO-8859-9", 837 new String[] {"ISO_8859-9:1989", "iso-ir-148", "ISO_8859-9", 838 "latin5", "l5", "csISOLatin5", "8859_9"}), 839 840 new Charset("ISO8859_13", "ISO-8859-13", new String[] {}), 841 new Charset("ISO8859_15", "ISO-8859-15", 842 new String[] {"ISO_8859-15", "Latin-9", "8859_15", 843 "csISOlatin9", "IBM923", "cp923", "923", "L9", 844 "IBM-923", "ISO8859-15", "LATIN9", "LATIN0", 845 "csISOlatin0", "ISO8859_15_FDIS"}), 846 new Charset("KOI8_R", "KOI8-R", new String[] {"csKOI8R", "koi8"}), 847 new Charset("ASCII", "US-ASCII", 848 new String[] {"ANSI_X3.4-1968", "iso-ir-6", 849 "ANSI_X3.4-1986", "ISO_646.irv:1991", 850 "ISO646-US", "us", "IBM367", "cp367", 851 "csASCII", "ascii7", "646", "iso_646.irv:1983"}), 852 new Charset("UTF8", "UTF-8", new String[] {}), 853 new Charset("UTF-16", "UTF-16", new String[] {"UTF_16"}), 854 new Charset("UnicodeBigUnmarked", "UTF-16BE", new String[] {"X-UTF-16BE", "UTF_16BE", "ISO-10646-UCS-2"}), 855 new Charset("UnicodeLittleUnmarked", "UTF-16LE", new String[] {"UTF_16LE", "X-UTF-16LE"}), 856 new Charset("Big5", "Big5", new String[] {"csBig5", "CN-Big5", "BIG-FIVE", "BIGFIVE"}), 857 new Charset("Big5_HKSCS", "Big5-HKSCS", new String[] {"big5hkscs"}), 858 new Charset("EUC_JP", "EUC-JP", 859 new String[] {"csEUCPkdFmtJapanese", 860 "Extended_UNIX_Code_Packed_Format_for_Japanese", 861 "eucjis", "x-eucjp", "eucjp", "x-euc-jp"}), 862 new Charset("EUC_KR", "EUC-KR", 863 new String[] {"csEUCKR", "ksc5601", "5601", "ksc5601_1987", 864 "ksc_5601", "ksc5601-1987", "ks_c_5601-1987", 865 "euckr"}), 866 new Charset("GB18030", "GB18030", new String[] {"gb18030-2000"}), 867 new Charset("EUC_CN", "GB2312", new String[] {"x-EUC-CN", "csGB2312", "euccn", "euc-cn", "gb2312-80", "gb2312-1980", "CN-GB", "CN-GB-ISOIR165"}), 868 new Charset("GBK", "windows-936", new String[] {"CP936", "MS936", "ms_936", "x-mswin-936"}), 869 870 new Charset("Cp037", "IBM037", new String[] {"ebcdic-cp-us", "ebcdic-cp-ca", "ebcdic-cp-wt", "ebcdic-cp-nl", "csIBM037"}), 871 new Charset("Cp273", "IBM273", new String[] {"csIBM273"}), 872 new Charset("Cp277", "IBM277", new String[] {"EBCDIC-CP-DK", "EBCDIC-CP-NO", "csIBM277"}), 873 new Charset("Cp278", "IBM278", new String[] {"CP278", "ebcdic-cp-fi", "ebcdic-cp-se", "csIBM278"}), 874 new Charset("Cp280", "IBM280", new String[] {"ebcdic-cp-it", "csIBM280"}), 875 new Charset("Cp284", "IBM284", new String[] {"ebcdic-cp-es", "csIBM284"}), 876 new Charset("Cp285", "IBM285", new String[] {"ebcdic-cp-gb", "csIBM285"}), 877 new Charset("Cp297", "IBM297", new String[] {"ebcdic-cp-fr", "csIBM297"}), 878 new Charset("Cp420", "IBM420", new String[] {"ebcdic-cp-ar1", "csIBM420"}), 879 new Charset("Cp424", "IBM424", new String[] {"ebcdic-cp-he", "csIBM424"}), 880 new Charset("Cp437", "IBM437", new String[] {"437", "csPC8CodePage437"}), 881 new Charset("Cp500", "IBM500", new String[] {"ebcdic-cp-be", "ebcdic-cp-ch", "csIBM500"}), 882 new Charset("Cp775", "IBM775", new String[] {"csPC775Baltic"}), 883 new Charset("Cp838", "IBM-Thai", new String[] {}), 884 new Charset("Cp850", "IBM850", new String[] {"850", "csPC850Multilingual"}), 885 new Charset("Cp852", "IBM852", new String[] {"852", "csPCp852"}), 886 new Charset("Cp855", "IBM855", new String[] {"855", "csIBM855"}), 887 new Charset("Cp857", "IBM857", new String[] {"857", "csIBM857"}), 888 new Charset("Cp858", "IBM00858", 889 new String[] {"CCSID00858", "CP00858", 890 "PC-Multilingual-850+euro"}), 891 new Charset("Cp860", "IBM860", new String[] {"860", "csIBM860"}), 892 new Charset("Cp861", "IBM861", new String[] {"861", "cp-is", "csIBM861"}), 893 new Charset("Cp862", "IBM862", new String[] {"862", "csPC862LatinHebrew"}), 894 new Charset("Cp863", "IBM863", new String[] {"863", "csIBM863"}), 895 new Charset("Cp864", "IBM864", new String[] {"cp864", "csIBM864"}), 896 new Charset("Cp865", "IBM865", new String[] {"865", "csIBM865"}), 897 new Charset("Cp866", "IBM866", new String[] {"866", "csIBM866"}), 898 new Charset("Cp868", "IBM868", new String[] {"cp-ar", "csIBM868"}), 899 new Charset("Cp869", "IBM869", new String[] {"cp-gr", "csIBM869"}), 900 new Charset("Cp870", "IBM870", new String[] {"ebcdic-cp-roece", "ebcdic-cp-yu", "csIBM870"}), 901 new Charset("Cp871", "IBM871", new String[] {"ebcdic-cp-is", "csIBM871"}), 902 new Charset("Cp918", "IBM918", new String[] {"ebcdic-cp-ar2", "csIBM918"}), 903 new Charset("Cp1026", "IBM1026", new String[] {"csIBM1026"}), 904 new Charset("Cp1047", "IBM1047", new String[] {"IBM-1047"}), 905 new Charset("Cp1140", "IBM01140", 906 new String[] {"CCSID01140", "CP01140", 907 "ebcdic-us-37+euro"}), 908 new Charset("Cp1141", "IBM01141", 909 new String[] {"CCSID01141", "CP01141", 910 "ebcdic-de-273+euro"}), 911 new Charset("Cp1142", "IBM01142", new String[] {"CCSID01142", "CP01142", "ebcdic-dk-277+euro", "ebcdic-no-277+euro"}), 912 new Charset("Cp1143", "IBM01143", new String[] {"CCSID01143", "CP01143", "ebcdic-fi-278+euro", "ebcdic-se-278+euro"}), 913 new Charset("Cp1144", "IBM01144", new String[] {"CCSID01144", "CP01144", "ebcdic-it-280+euro"}), 914 new Charset("Cp1145", "IBM01145", new String[] {"CCSID01145", "CP01145", "ebcdic-es-284+euro"}), 915 new Charset("Cp1146", "IBM01146", new String[] {"CCSID01146", "CP01146", "ebcdic-gb-285+euro"}), 916 new Charset("Cp1147", "IBM01147", new String[] {"CCSID01147", "CP01147", "ebcdic-fr-297+euro"}), 917 new Charset("Cp1148", "IBM01148", new String[] {"CCSID01148", "CP01148", "ebcdic-international-500+euro"}), 918 new Charset("Cp1149", "IBM01149", new String[] {"CCSID01149", "CP01149", "ebcdic-is-871+euro"}), 919 new Charset("Cp1250", "windows-1250", new String[] {}), 920 new Charset("Cp1251", "windows-1251", new String[] {}), 921 new Charset("Cp1252", "windows-1252", new String[] {}), 922 new Charset("Cp1253", "windows-1253", new String[] {}), 923 new Charset("Cp1254", "windows-1254", new String[] {}), 924 new Charset("Cp1255", "windows-1255", new String[] {}), 925 new Charset("Cp1256", "windows-1256", new String[] {}), 926 new Charset("Cp1257", "windows-1257", new String[] {}), 927 new Charset("Cp1258", "windows-1258", new String[] {}), 928 new Charset("ISO2022CN", "ISO-2022-CN", new String[] {}), 929 new Charset("ISO2022JP", "ISO-2022-JP", new String[] {"csISO2022JP", "JIS", "jis_encoding", "csjisencoding"}), 930 new Charset("ISO2022KR", "ISO-2022-KR", new String[] {"csISO2022KR"}), 931 new Charset("JIS_X0201", "JIS_X0201", new String[] {"X0201", "JIS0201", "csHalfWidthKatakana"}), 932 new Charset("JIS_X0212-1990", "JIS_X0212-1990", new String[] {"iso-ir-159", "x0212", "JIS0212", "csISO159JISX02121990"}), 933 new Charset("JIS_C6626-1983", "JIS_C6626-1983", new String[] {"x-JIS0208", "JIS0208", "csISO87JISX0208", "x0208", "JIS_X0208-1983", "iso-ir-87"}), 934 new Charset("SJIS", "Shift_JIS", new String[] {"MS_Kanji", "csShiftJIS", "shift-jis", "x-sjis", "pck"}), 935 new Charset("TIS620", "TIS-620", new String[] {}), 936 new Charset("MS932", "Windows-31J", new String[] {"windows-932", "csWindows31J", "x-ms-cp932"}), 937 new Charset("EUC_TW", "EUC-TW", new String[] {"x-EUC-TW", "cns11643", "euctw"}), 938 new Charset("x-Johab", "johab", new String[] {"johab", "cp1361", "ms1361", "ksc5601-1992", "ksc5601_1992"}), 939 new Charset("MS950_HKSCS", "", new String[] {}), 940 new Charset("MS874", "windows-874", new String[] {"cp874"}), 941 new Charset("MS949", "windows-949", new String[] {"windows949", "ms_949", "x-windows-949"}), 942 new Charset("MS950", "windows-950", new String[] {"x-windows-950"}), 943 944 new Charset("Cp737", null, new String[] {}), 945 new Charset("Cp856", null, new String[] {}), 946 new Charset("Cp875", null, new String[] {}), 947 new Charset("Cp921", null, new String[] {}), 948 new Charset("Cp922", null, new String[] {}), 949 new Charset("Cp930", null, new String[] {}), 950 new Charset("Cp933", null, new String[] {}), 951 new Charset("Cp935", null, new String[] {}), 952 new Charset("Cp937", null, new String[] {}), 953 new Charset("Cp939", null, new String[] {}), 954 new Charset("Cp942", null, new String[] {}), 955 new Charset("Cp942C", null, new String[] {}), 956 new Charset("Cp943", null, new String[] {}), 957 new Charset("Cp943C", null, new String[] {}), 958 new Charset("Cp948", null, new String[] {}), 959 new Charset("Cp949", null, new String[] {}), 960 new Charset("Cp949C", null, new String[] {}), 961 new Charset("Cp950", null, new String[] {}), 962 new Charset("Cp964", null, new String[] {}), 963 new Charset("Cp970", null, new String[] {}), 964 new Charset("Cp1006", null, new String[] {}), 965 new Charset("Cp1025", null, new String[] {}), 966 new Charset("Cp1046", null, new String[] {}), 967 new Charset("Cp1097", null, new String[] {}), 968 new Charset("Cp1098", null, new String[] {}), 969 new Charset("Cp1112", null, new String[] {}), 970 new Charset("Cp1122", null, new String[] {}), 971 new Charset("Cp1123", null, new String[] {}), 972 new Charset("Cp1124", null, new String[] {}), 973 new Charset("Cp1381", null, new String[] {}), 974 new Charset("Cp1383", null, new String[] {}), 975 new Charset("Cp33722", null, new String[] {}), 976 new Charset("Big5_Solaris", null, new String[] {}), 977 new Charset("EUC_JP_LINUX", null, new String[] {}), 978 new Charset("EUC_JP_Solaris", null, new String[] {}), 979 new Charset("ISCII91", null, new String[] {"x-ISCII91", "iscii"}), 980 new Charset("ISO2022_CN_CNS", null, new String[] {}), 981 new Charset("ISO2022_CN_GB", null, new String[] {}), 982 new Charset("x-iso-8859-11", null, new String[] {}), 983 new Charset("JISAutoDetect", null, new String[] {}), 984 new Charset("MacArabic", null, new String[] {}), 985 new Charset("MacCentralEurope", null, new String[] {}), 986 new Charset("MacCroatian", null, new String[] {}), 987 new Charset("MacCyrillic", null, new String[] {}), 988 new Charset("MacDingbat", null, new String[] {}), 989 new Charset("MacGreek", "MacGreek", new String[] {}), 990 new Charset("MacHebrew", null, new String[] {}), 991 new Charset("MacIceland", null, new String[] {}), 992 new Charset("MacRoman", "MacRoman", new String[] {"Macintosh", "MAC", "csMacintosh"}), 993 new Charset("MacRomania", null, new String[] {}), 994 new Charset("MacSymbol", null, new String[] {}), 995 new Charset("MacThai", null, new String[] {}), 996 new Charset("MacTurkish", null, new String[] {}), 997 new Charset("MacUkraine", null, new String[] {}), 998 new Charset("UnicodeBig", null, new String[] {}), 999 new Charset("UnicodeLittle", null, new String[] {}) 1000 }; 1001 1002 /** 1003 * Contains the canonical names of character sets which can be used to 1004 * decode bytes into Java chars. 1005 */ 1006 private static TreeSet<String> decodingSupported = null; 1007 1008 /** 1009 * Contains the canonical names of character sets which can be used to 1010 * encode Java chars into bytes. 1011 */ 1012 private static TreeSet<String> encodingSupported = null; 1013 1014 /** 1015 * Maps character set names to Charset objects. All possible names of 1016 * a charset will be mapped to the Charset. 1017 */ 1018 private static HashMap<String, Charset> charsetMap = null; 1019 1020 static { 1021 decodingSupported = new TreeSet<String>(); 1022 encodingSupported = new TreeSet<String>(); 1023 byte[] dummy = new byte[] {'d', 'u', 'm', 'm', 'y'}; 1024 for (int i = 0; i < JAVA_CHARSETS.length; i++) { 1025 try { 1026 String s = new String(dummy, JAVA_CHARSETS[i].canonical); decodingSupported.add(JAVA_CHARSETS[i].canonical.toLowerCase(Locale.US))1027 decodingSupported.add(JAVA_CHARSETS[i].canonical.toLowerCase(Locale.US)); 1028 } catch (UnsupportedOperationException e) { 1029 } catch (UnsupportedEncodingException e) { 1030 } 1031 try { getBytes(JAVA_CHARSETS[i].canonical)1032 "dummy".getBytes(JAVA_CHARSETS[i].canonical); encodingSupported.add(JAVA_CHARSETS[i].canonical.toLowerCase(Locale.US))1033 encodingSupported.add(JAVA_CHARSETS[i].canonical.toLowerCase(Locale.US)); 1034 } catch (UnsupportedOperationException e) { 1035 } catch (UnsupportedEncodingException e) { 1036 } 1037 } 1038 1039 charsetMap = new HashMap<String, Charset>(); 1040 for (int i = 0; i < JAVA_CHARSETS.length; i++) { 1041 Charset c = JAVA_CHARSETS[i]; c.canonical.toLowerCase(Locale.US)1042 charsetMap.put(c.canonical.toLowerCase(Locale.US), c); 1043 if (c.mime != null) { c.mime.toLowerCase(Locale.US)1044 charsetMap.put(c.mime.toLowerCase(Locale.US), c); 1045 } 1046 if (c.aliases != null) { 1047 for (int j = 0; j < c.aliases.length; j++) { charsetMap.put(c.aliases[j].toLowerCase(Locale.US), c)1048 charsetMap.put(c.aliases[j].toLowerCase(Locale.US), c); 1049 } 1050 } 1051 } 1052 1053 if (log.isDebugEnabled()) { 1054 log.debug("Character sets which support decoding: " 1055 + decodingSupported); 1056 log.debug("Character sets which support encoding: " 1057 + encodingSupported); 1058 } 1059 } 1060 1061 /** 1062 * ANDROID: THE FOLLOWING SET OF STATIC STRINGS ARE COPIED FROM A NEWER VERSION OF MIME4J 1063 */ 1064 1065 /** carriage return - line feed sequence */ 1066 public static final String CRLF = "\r\n"; 1067 1068 /** US-ASCII CR, carriage return (13) */ 1069 public static final int CR = '\r'; 1070 1071 /** US-ASCII LF, line feed (10) */ 1072 public static final int LF = '\n'; 1073 1074 /** US-ASCII SP, space (32) */ 1075 public static final int SP = ' '; 1076 1077 /** US-ASCII HT, horizontal-tab (9)*/ 1078 public static final int HT = '\t'; 1079 1080 public static final java.nio.charset.Charset US_ASCII = java.nio.charset.Charset 1081 .forName("US-ASCII"); 1082 1083 public static final java.nio.charset.Charset ISO_8859_1 = java.nio.charset.Charset 1084 .forName("ISO-8859-1"); 1085 1086 public static final java.nio.charset.Charset UTF_8 = java.nio.charset.Charset 1087 .forName("UTF-8"); 1088 1089 /** 1090 * Returns <code>true</code> if the specified character is a whitespace 1091 * character (CR, LF, SP or HT). 1092 * 1093 * ANDROID: COPIED FROM A NEWER VERSION OF MIME4J 1094 * 1095 * @param ch 1096 * character to test. 1097 * @return <code>true</code> if the specified character is a whitespace 1098 * character, <code>false</code> otherwise. 1099 */ isWhitespace(char ch)1100 public static boolean isWhitespace(char ch) { 1101 return ch == SP || ch == HT || ch == CR || ch == LF; 1102 } 1103 1104 /** 1105 * Returns <code>true</code> if the specified string consists entirely of 1106 * whitespace characters. 1107 * 1108 * ANDROID: COPIED FROM A NEWER VERSION OF MIME4J 1109 * 1110 * @param s 1111 * string to test. 1112 * @return <code>true</code> if the specified string consists entirely of 1113 * whitespace characters, <code>false</code> otherwise. 1114 */ isWhitespace(final String s)1115 public static boolean isWhitespace(final String s) { 1116 if (s == null) { 1117 throw new IllegalArgumentException("String may not be null"); 1118 } 1119 final int len = s.length(); 1120 for (int i = 0; i < len; i++) { 1121 if (!isWhitespace(s.charAt(i))) { 1122 return false; 1123 } 1124 } 1125 return true; 1126 } 1127 1128 /** 1129 * Determines if the VM supports encoding (chars to bytes) the 1130 * specified character set. NOTE: the given character set name may 1131 * not be known to the VM even if this method returns <code>true</code>. 1132 * Use {@link #toJavaCharset(String)} to get the canonical Java character 1133 * set name. 1134 * 1135 * @param charsetName the characters set name. 1136 * @return <code>true</code> if encoding is supported, <code>false</code> 1137 * otherwise. 1138 */ isEncodingSupported(String charsetName)1139 public static boolean isEncodingSupported(String charsetName) { 1140 return encodingSupported.contains(charsetName.toLowerCase(Locale.US)); 1141 } 1142 1143 /** 1144 * Determines if the VM supports decoding (bytes to chars) the 1145 * specified character set. NOTE: the given character set name may 1146 * not be known to the VM even if this method returns <code>true</code>. 1147 * Use {@link #toJavaCharset(String)} to get the canonical Java character 1148 * set name. 1149 * 1150 * @param charsetName the characters set name. 1151 * @return <code>true</code> if decoding is supported, <code>false</code> 1152 * otherwise. 1153 */ isDecodingSupported(String charsetName)1154 public static boolean isDecodingSupported(String charsetName) { 1155 return decodingSupported.contains(charsetName.toLowerCase(Locale.US)); 1156 } 1157 1158 /** 1159 * Gets the preferred MIME character set name for the specified 1160 * character set or <code>null</code> if not known. 1161 * 1162 * @param charsetName the character set name to look for. 1163 * @return the MIME preferred name or <code>null</code> if not known. 1164 */ toMimeCharset(String charsetName)1165 public static String toMimeCharset(String charsetName) { 1166 Charset c = charsetMap.get(charsetName.toLowerCase(Locale.US)); 1167 if (c != null) { 1168 return c.mime; 1169 } 1170 return null; 1171 } 1172 1173 /** 1174 * Gets the canonical Java character set name for the specified 1175 * character set or <code>null</code> if not known. This should be 1176 * called before doing any conversions using the Java API. NOTE: 1177 * you must use {@link #isEncodingSupported(String)} or 1178 * {@link #isDecodingSupported(String)} to make sure the returned 1179 * Java character set is supported by the current VM. 1180 * 1181 * @param charsetName the character set name to look for. 1182 * @return the canonical Java name or <code>null</code> if not known. 1183 */ toJavaCharset(String charsetName)1184 public static String toJavaCharset(String charsetName) { 1185 Charset c = charsetMap.get(charsetName.toLowerCase(Locale.US)); 1186 if (c != null) { 1187 return c.canonical; 1188 } 1189 return null; 1190 } 1191 getCharset(String charsetName)1192 public static java.nio.charset.Charset getCharset(String charsetName) { 1193 String defaultCharset = "ISO-8859-1"; 1194 1195 // Use the default chareset if given charset is null 1196 if(charsetName == null) charsetName = defaultCharset; 1197 1198 try { 1199 return java.nio.charset.Charset.forName(charsetName); 1200 } catch (IllegalCharsetNameException e) { 1201 log.info("Illegal charset " + charsetName + ", fallback to " + 1202 defaultCharset + ": " + e); 1203 // Use default charset on exception 1204 return java.nio.charset.Charset.forName(defaultCharset); 1205 } catch (UnsupportedCharsetException ex) { 1206 log.info("Unsupported charset " + charsetName + ", fallback to " + 1207 defaultCharset + ": " + ex); 1208 // Use default charset on exception 1209 return java.nio.charset.Charset.forName(defaultCharset); 1210 } 1211 1212 } 1213 /* 1214 * Uncomment the code below and run the main method to regenerate the 1215 * Javadoc table above when the known charsets change. 1216 */ 1217 1218 /* 1219 private static String dumpHtmlTable() { 1220 LinkedList l = new LinkedList(Arrays.asList(JAVA_CHARSETS)); 1221 Collections.sort(l); 1222 StringBuffer sb = new StringBuffer(); 1223 sb.append(" * <table>\n"); 1224 sb.append(" * <tr>\n"); 1225 sb.append(" * <td>Canonical (Java) name</td>\n"); 1226 sb.append(" * <td>MIME preferred</td>\n"); 1227 sb.append(" * <td>Aliases</td>\n"); 1228 sb.append(" * </tr>\n"); 1229 1230 for (Iterator it = l.iterator(); it.hasNext();) { 1231 Charset c = (Charset) it.next(); 1232 sb.append(" * <tr>\n"); 1233 sb.append(" * <td>" + c.canonical + "</td>\n"); 1234 sb.append(" * <td>" + (c.mime == null ? "?" : c.mime)+ "</td>\n"); 1235 sb.append(" * <td>"); 1236 for (int i = 0; c.aliases != null && i < c.aliases.length; i++) { 1237 sb.append(c.aliases[i] + " "); 1238 } 1239 sb.append("</td>\n"); 1240 sb.append(" * </tr>\n"); 1241 } 1242 sb.append(" * </table>\n"); 1243 return sb.toString(); 1244 } 1245 1246 public static void main(String[] args) { 1247 System.out.println(dumpHtmlTable()); 1248 }*/ 1249 } 1250