1 /* 2 ********************************************************************** 3 * Copyright (c) 2005-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: John Emmons 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.posix; 10 11 import java.io.PrintWriter; 12 import java.nio.charset.Charset; 13 14 import org.unicode.cldr.icu.SimpleConverter; 15 import org.unicode.cldr.util.CLDRFile; 16 import org.unicode.cldr.util.CLDRPaths; 17 import org.unicode.cldr.util.Factory; 18 import org.unicode.cldr.util.SupplementalDataInfo; 19 20 import com.ibm.icu.lang.UCharacter; 21 import com.ibm.icu.lang.UProperty; 22 import com.ibm.icu.lang.UScript; 23 import com.ibm.icu.text.UnicodeSet; 24 import com.ibm.icu.text.UnicodeSetIterator; 25 26 /** 27 * Class to generate POSIX format from CLDR. 28 * 29 * @author jcemmons 30 */ 31 32 public class POSIXLocale { 33 34 String locale_name; 35 String codeset; 36 POSIX_LCCtype lc_ctype; 37 POSIX_LCCollate lc_collate; 38 POSIX_LCNumeric lc_numeric; 39 POSIX_LCMonetary lc_monetary; 40 POSIX_LCTime lc_time; 41 POSIX_LCMessages lc_messages; 42 POSIXVariant variant; 43 POSIXLocale(String locale_name, UnicodeSet repertoire, Charset cs, String codeset, UnicodeSet collateset, POSIXVariant variant)44 public POSIXLocale(String locale_name, UnicodeSet repertoire, Charset cs, String codeset, UnicodeSet collateset, 45 POSIXVariant variant) throws Exception { 46 47 this.locale_name = locale_name; 48 this.codeset = codeset; 49 this.variant = variant; 50 51 Factory mainFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); 52 Factory suppFactory = Factory.make(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY, ".*"); 53 Factory collFactory = Factory.make(CLDRPaths.COLLATION_DIRECTORY, ".*"); 54 CLDRFile doc = mainFactory.make(locale_name, true); 55 SupplementalDataInfo supp = SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 56 CLDRFile char_fallbk = suppFactory.make("characters", false); 57 CLDRFile collrules = collFactory.makeWithFallback(locale_name); 58 59 if (repertoire.isEmpty() && codeset.equals("UTF-8")) // Generate default repertoire set from exemplar 60 // characters; 61 { 62 String SearchLocation = "//ldml/characters/exemplarCharacters"; 63 UnicodeSet ExemplarCharacters = new UnicodeSet(doc.getStringValue(SearchLocation)); 64 UnicodeSetIterator ec = new UnicodeSetIterator(ExemplarCharacters); 65 while (ec.next()) { 66 if ((ec.codepoint != UnicodeSetIterator.IS_STRING) && (ec.codepoint <= 0x00ffff)) 67 repertoire.add(ec.codepoint); 68 } 69 UnicodeSet CaseFoldedExemplars = new UnicodeSet(ExemplarCharacters.closeOver(UnicodeSet.CASE)); 70 UnicodeSetIterator cfe = new UnicodeSetIterator(CaseFoldedExemplars); 71 while (cfe.next()) { 72 if ((cfe.codepoint != UnicodeSetIterator.IS_STRING) && (cfe.codepoint <= 0x00ffff)) 73 repertoire.add(cfe.codepoint); 74 } 75 76 UnicodeSetIterator it = new UnicodeSetIterator(repertoire); 77 int PreviousScript = UScript.INVALID_CODE; 78 while (it.next()) { 79 if ((it.codepoint != UnicodeSetIterator.IS_STRING) && (it.codepoint <= 0x00ffff)) { 80 int Script = UScript.getScript(it.codepoint); 81 if (Script != UScript.COMMON && 82 Script != UScript.INHERITED && 83 Script != UScript.INVALID_CODE && 84 Script != UScript.HAN && 85 Script != PreviousScript) // Hopefully this speeds up the process... 86 { 87 UnicodeSet ThisScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, Script); 88 UnicodeSetIterator ts = new UnicodeSetIterator(ThisScript); 89 while (ts.next()) { 90 if ((ts.codepoint != UnicodeSetIterator.IS_STRING) && (ts.codepoint <= 0x00ffff)) 91 repertoire.add(ts.codepoint); 92 } 93 PreviousScript = Script; 94 } 95 } 96 } 97 98 repertoire.add(0x0000, 0x007f); // Always add the ASCII set 99 100 } else if (!codeset.equals("UTF-8")) { 101 UnicodeSet csset = new SimpleConverter(cs).getCharset(); 102 repertoire = new UnicodeSet(UnicodeSet.MIN_VALUE, UnicodeSet.MAX_VALUE).retainAll(csset); 103 POSIXUtilities.setRepertoire(repertoire); 104 } 105 106 UnicodeSetIterator rep = new UnicodeSetIterator(repertoire); 107 while (rep.next()) { 108 if (!UCharacter.isDefined(rep.codepoint) && (rep.codepoint != UnicodeSetIterator.IS_STRING)) 109 repertoire.remove(rep.codepoint); 110 } 111 112 POSIXUtilities.setCharFallback(char_fallbk); 113 114 lc_collate = new POSIX_LCCollate(doc, repertoire, collrules, collateset, codeset, variant); 115 116 if (codeset.equals("UTF-8")) { 117 UnicodeSet tailored = lc_collate.col.getTailoredSet(); 118 119 // Add the tailored characters, and close over script 120 121 UnicodeSetIterator it = new UnicodeSetIterator(tailored); 122 int PreviousScript = UScript.INVALID_CODE; 123 while (it.next()) { 124 if (it.codepoint != UnicodeSetIterator.IS_STRING && (it.codepoint <= 0x00ffff)) { 125 int Script = UScript.getScript(it.codepoint); 126 if (Script != UScript.COMMON && 127 Script != UScript.INHERITED && 128 Script != UScript.INVALID_CODE && 129 Script != UScript.HAN && 130 Script != PreviousScript) // Hopefully this speeds up the process... 131 { 132 UnicodeSet ThisScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, Script); 133 UnicodeSetIterator ts = new UnicodeSetIterator(ThisScript); 134 while (ts.next()) { 135 if ((ts.codepoint != UnicodeSetIterator.IS_STRING) && (ts.codepoint <= 0x00ffff)) 136 repertoire.add(ts.codepoint); 137 } 138 PreviousScript = Script; 139 } 140 } 141 } 142 } 143 144 lc_ctype = new POSIX_LCCtype(doc, repertoire); 145 lc_numeric = new POSIX_LCNumeric(doc); 146 lc_monetary = new POSIX_LCMonetary(doc, supp, variant); 147 lc_time = new POSIX_LCTime(doc, variant); 148 lc_messages = new POSIX_LCMessages(doc, locale_name, variant); 149 150 } // end POSIXLocale ( String locale_name, String cldr_data_location ); 151 write(PrintWriter out)152 public void write(PrintWriter out) { 153 154 out.println("comment_char *"); 155 out.println("escape_char /"); 156 out.println(""); 157 out.println("*************************************************************************************************"); 158 out.println("* POSIX Locale *"); 159 out.println("* Generated automatically from the Unicode Character Database and Common Locale Data Repository *"); 160 out.println("* see http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html *"); 161 out.println("* Locale Name : " + locale_name + " Codeset : " + codeset); 162 out.println("*************************************************************************************************"); 163 out.println("* Copyright 1991-2013 Unicode, Inc. All rights reserved. Distributed under the Terms of Use in *"); 164 out.println("* http://www.unicode.org/copyright.html. *"); 165 out.println("* *"); 166 out.println("* Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode *"); 167 out.println("* data files and any associated documentation (the \"Data Files\") or Unicode software and any *"); 168 out.println("* associated documentation (the \"Software\") to deal in the Data Files or Software without *"); 169 out.println("* restriction, including without limitation the rights to use, copy, modify, merge, publish, *"); 170 out.println("* distribute, and/or sell copies of the Data Files or Software, and to permit persons to whom *"); 171 out.println("* the Data Files or Software are furnished to do so, provided that (a) the above copyright *"); 172 out.println("* notice(s) and this permission notice appear with all copies of the Data Files or Software, *"); 173 out.println("* (b) both the above copyright notice(s) and this permission notice appear in associated *"); 174 out.println("* documentation, and (c) there is clear notice in each modified Data File or in the Software as *"); 175 out.println("* well as in the documentation associated with the Data File(s) or Software that the data or *"); 176 out.println("* software has been modified. *"); 177 out.println("* *"); 178 out.println("* THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *"); 179 out.println("* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A *"); 180 out.println("* PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT *"); 181 out.println("* HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR *"); 182 out.println("* CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, *"); 183 out.println("* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN *"); 184 out.println("* CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE. *"); 185 out.println("*************************************************************************************************"); 186 out.println(""); 187 188 lc_ctype.write(out); 189 lc_collate.write(out); 190 lc_numeric.write(out); 191 lc_monetary.write(out); 192 lc_time.write(out, variant); 193 lc_messages.write(out); 194 195 } // end write(PrintWriter out); 196 197 } 198