1 /* 2 ********************************************************************** 3 * Copyright (c) 2005-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: John Emmons 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.posix; 10 11 import java.io.PrintWriter; 12 import java.nio.charset.Charset; 13 14 import org.unicode.cldr.icu.SimpleConverter; 15 import org.unicode.cldr.util.CLDRFile; 16 import org.unicode.cldr.util.CLDRPaths; 17 import org.unicode.cldr.util.CldrUtility; 18 import org.unicode.cldr.util.Factory; 19 import org.unicode.cldr.util.SupplementalDataInfo; 20 21 import com.ibm.icu.lang.UCharacter; 22 import com.ibm.icu.lang.UProperty; 23 import com.ibm.icu.lang.UScript; 24 import com.ibm.icu.text.UnicodeSet; 25 import com.ibm.icu.text.UnicodeSetIterator; 26 27 /** 28 * Class to generate POSIX format from CLDR. 29 * 30 * @author jcemmons 31 */ 32 33 public class POSIXLocale { 34 35 String locale_name; 36 String codeset; 37 POSIX_LCCtype lc_ctype; 38 POSIX_LCCollate lc_collate; 39 POSIX_LCNumeric lc_numeric; 40 POSIX_LCMonetary lc_monetary; 41 POSIX_LCTime lc_time; 42 POSIX_LCMessages lc_messages; 43 POSIXVariant variant; 44 POSIXLocale(String locale_name, UnicodeSet repertoire, Charset cs, String codeset, UnicodeSet collateset, POSIXVariant variant)45 public POSIXLocale(String locale_name, UnicodeSet repertoire, Charset cs, String codeset, UnicodeSet collateset, 46 POSIXVariant variant) throws Exception { 47 48 this.locale_name = locale_name; 49 this.codeset = codeset; 50 this.variant = variant; 51 52 Factory mainFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 53 Factory suppFactory = Factory.make(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY, ".*"); 54 Factory collFactory = Factory.make(CLDRPaths.COLLATION_DIRECTORY, ".*"); 55 CLDRFile doc = mainFactory.make(locale_name, true); 56 SupplementalDataInfo supp = SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 57 CLDRFile char_fallbk = suppFactory.make("characters", false); 58 CLDRFile collrules = collFactory.makeWithFallback(locale_name); 59 60 if (repertoire.isEmpty() && codeset.equals("UTF-8")) // Generate default repertoire set from exemplar 61 // characters; 62 { 63 String SearchLocation = "//ldml/characters/exemplarCharacters"; 64 UnicodeSet ExemplarCharacters = new UnicodeSet(doc.getStringValue(SearchLocation)); 65 UnicodeSetIterator ec = new UnicodeSetIterator(ExemplarCharacters); 66 while (ec.next()) { 67 if ((ec.codepoint != UnicodeSetIterator.IS_STRING) && (ec.codepoint <= 0x00ffff)) 68 repertoire.add(ec.codepoint); 69 } 70 UnicodeSet CaseFoldedExemplars = new UnicodeSet(ExemplarCharacters.closeOver(UnicodeSet.CASE)); 71 UnicodeSetIterator cfe = new UnicodeSetIterator(CaseFoldedExemplars); 72 while (cfe.next()) { 73 if ((cfe.codepoint != UnicodeSetIterator.IS_STRING) && (cfe.codepoint <= 0x00ffff)) 74 repertoire.add(cfe.codepoint); 75 } 76 77 UnicodeSetIterator it = new UnicodeSetIterator(repertoire); 78 int PreviousScript = UScript.INVALID_CODE; 79 while (it.next()) { 80 if ((it.codepoint != UnicodeSetIterator.IS_STRING) && (it.codepoint <= 0x00ffff)) { 81 int Script = UScript.getScript(it.codepoint); 82 if (Script != UScript.COMMON && 83 Script != UScript.INHERITED && 84 Script != UScript.INVALID_CODE && 85 Script != UScript.HAN && 86 Script != PreviousScript) // Hopefully this speeds up the process... 87 { 88 UnicodeSet ThisScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, Script); 89 UnicodeSetIterator ts = new UnicodeSetIterator(ThisScript); 90 while (ts.next()) { 91 if ((ts.codepoint != UnicodeSetIterator.IS_STRING) && (ts.codepoint <= 0x00ffff)) 92 repertoire.add(ts.codepoint); 93 } 94 PreviousScript = Script; 95 } 96 } 97 } 98 99 repertoire.add(0x0000, 0x007f); // Always add the ASCII set 100 101 } else if (!codeset.equals("UTF-8")) { 102 UnicodeSet csset = new SimpleConverter(cs).getCharset(); 103 repertoire = new UnicodeSet(UnicodeSet.MIN_VALUE, UnicodeSet.MAX_VALUE).retainAll(csset); 104 POSIXUtilities.setRepertoire(repertoire); 105 } 106 107 UnicodeSetIterator rep = new UnicodeSetIterator(repertoire); 108 while (rep.next()) { 109 if (!UCharacter.isDefined(rep.codepoint) && (rep.codepoint != UnicodeSetIterator.IS_STRING)) 110 repertoire.remove(rep.codepoint); 111 } 112 113 POSIXUtilities.setCharFallback(char_fallbk); 114 115 lc_collate = new POSIX_LCCollate(doc, repertoire, collrules, collateset, codeset, variant); 116 117 if (codeset.equals("UTF-8")) { 118 UnicodeSet tailored = lc_collate.col.getTailoredSet(); 119 120 // Add the tailored characters, and close over script 121 122 UnicodeSetIterator it = new UnicodeSetIterator(tailored); 123 int PreviousScript = UScript.INVALID_CODE; 124 while (it.next()) { 125 if (it.codepoint != UnicodeSetIterator.IS_STRING && (it.codepoint <= 0x00ffff)) { 126 int Script = UScript.getScript(it.codepoint); 127 if (Script != UScript.COMMON && 128 Script != UScript.INHERITED && 129 Script != UScript.INVALID_CODE && 130 Script != UScript.HAN && 131 Script != PreviousScript) // Hopefully this speeds up the process... 132 { 133 UnicodeSet ThisScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, Script); 134 UnicodeSetIterator ts = new UnicodeSetIterator(ThisScript); 135 while (ts.next()) { 136 if ((ts.codepoint != UnicodeSetIterator.IS_STRING) && (ts.codepoint <= 0x00ffff)) 137 repertoire.add(ts.codepoint); 138 } 139 PreviousScript = Script; 140 } 141 } 142 } 143 } 144 145 lc_ctype = new POSIX_LCCtype(doc, repertoire); 146 lc_numeric = new POSIX_LCNumeric(doc); 147 lc_monetary = new POSIX_LCMonetary(doc, supp, variant); 148 lc_time = new POSIX_LCTime(doc, variant); 149 lc_messages = new POSIX_LCMessages(doc, locale_name, variant); 150 151 } // end POSIXLocale ( String locale_name, String cldr_data_location ); 152 write(PrintWriter out)153 public void write(PrintWriter out) { 154 155 out.println("comment_char *"); 156 out.println("escape_char /"); 157 out.println(""); 158 out.println("*************************************************************************************************"); 159 out.println("* POSIX Locale *"); 160 out.println("* Generated automatically from the Unicode Character Database and Common Locale Data Repository *"); 161 out.println("* see http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html *"); 162 out.println("* Locale Name : " + locale_name + " Codeset : " + codeset); 163 out.println("*************************************************************************************************"); 164 out.println(CldrUtility.getCopyrightString("* ")); 165 166 lc_ctype.write(out); 167 lc_collate.write(out); 168 lc_numeric.write(out); 169 lc_monetary.write(out); 170 lc_time.write(out, variant); 171 lc_messages.write(out); 172 173 } // end write(PrintWriter out); 174 175 } 176