• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2005-2011, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: John Emmons
7  **********************************************************************
8  */
9 package org.unicode.cldr.posix;
10 
11 import java.io.PrintWriter;
12 import java.nio.charset.Charset;
13 
14 import org.unicode.cldr.icu.SimpleConverter;
15 import org.unicode.cldr.util.CLDRFile;
16 import org.unicode.cldr.util.CLDRPaths;
17 import org.unicode.cldr.util.Factory;
18 import org.unicode.cldr.util.SupplementalDataInfo;
19 
20 import com.ibm.icu.lang.UCharacter;
21 import com.ibm.icu.lang.UProperty;
22 import com.ibm.icu.lang.UScript;
23 import com.ibm.icu.text.UnicodeSet;
24 import com.ibm.icu.text.UnicodeSetIterator;
25 
26 /**
27  * Class to generate POSIX format from CLDR.
28  *
29  * @author jcemmons
30  */
31 
32 public class POSIXLocale {
33 
34     String locale_name;
35     String codeset;
36     POSIX_LCCtype lc_ctype;
37     POSIX_LCCollate lc_collate;
38     POSIX_LCNumeric lc_numeric;
39     POSIX_LCMonetary lc_monetary;
40     POSIX_LCTime lc_time;
41     POSIX_LCMessages lc_messages;
42     POSIXVariant variant;
43 
POSIXLocale(String locale_name, UnicodeSet repertoire, Charset cs, String codeset, UnicodeSet collateset, POSIXVariant variant)44     public POSIXLocale(String locale_name, UnicodeSet repertoire, Charset cs, String codeset, UnicodeSet collateset,
45         POSIXVariant variant) throws Exception {
46 
47         this.locale_name = locale_name;
48         this.codeset = codeset;
49         this.variant = variant;
50 
51         Factory mainFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
52         Factory suppFactory = Factory.make(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY, ".*");
53         Factory collFactory = Factory.make(CLDRPaths.COLLATION_DIRECTORY, ".*");
54         CLDRFile doc = mainFactory.make(locale_name, true);
55         SupplementalDataInfo supp = SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
56         CLDRFile char_fallbk = suppFactory.make("characters", false);
57         CLDRFile collrules = collFactory.makeWithFallback(locale_name);
58 
59         if (repertoire.isEmpty() && codeset.equals("UTF-8")) // Generate default repertoire set from exemplar
60         // characters;
61         {
62             String SearchLocation = "//ldml/characters/exemplarCharacters";
63             UnicodeSet ExemplarCharacters = new UnicodeSet(doc.getStringValue(SearchLocation));
64             UnicodeSetIterator ec = new UnicodeSetIterator(ExemplarCharacters);
65             while (ec.next()) {
66                 if ((ec.codepoint != UnicodeSetIterator.IS_STRING) && (ec.codepoint <= 0x00ffff))
67                     repertoire.add(ec.codepoint);
68             }
69             UnicodeSet CaseFoldedExemplars = new UnicodeSet(ExemplarCharacters.closeOver(UnicodeSet.CASE));
70             UnicodeSetIterator cfe = new UnicodeSetIterator(CaseFoldedExemplars);
71             while (cfe.next()) {
72                 if ((cfe.codepoint != UnicodeSetIterator.IS_STRING) && (cfe.codepoint <= 0x00ffff))
73                     repertoire.add(cfe.codepoint);
74             }
75 
76             UnicodeSetIterator it = new UnicodeSetIterator(repertoire);
77             int PreviousScript = UScript.INVALID_CODE;
78             while (it.next()) {
79                 if ((it.codepoint != UnicodeSetIterator.IS_STRING) && (it.codepoint <= 0x00ffff)) {
80                     int Script = UScript.getScript(it.codepoint);
81                     if (Script != UScript.COMMON &&
82                         Script != UScript.INHERITED &&
83                         Script != UScript.INVALID_CODE &&
84                         Script != UScript.HAN &&
85                         Script != PreviousScript) // Hopefully this speeds up the process...
86                     {
87                         UnicodeSet ThisScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, Script);
88                         UnicodeSetIterator ts = new UnicodeSetIterator(ThisScript);
89                         while (ts.next()) {
90                             if ((ts.codepoint != UnicodeSetIterator.IS_STRING) && (ts.codepoint <= 0x00ffff))
91                                 repertoire.add(ts.codepoint);
92                         }
93                         PreviousScript = Script;
94                     }
95                 }
96             }
97 
98             repertoire.add(0x0000, 0x007f); // Always add the ASCII set
99 
100         } else if (!codeset.equals("UTF-8")) {
101             UnicodeSet csset = new SimpleConverter(cs).getCharset();
102             repertoire = new UnicodeSet(UnicodeSet.MIN_VALUE, UnicodeSet.MAX_VALUE).retainAll(csset);
103             POSIXUtilities.setRepertoire(repertoire);
104         }
105 
106         UnicodeSetIterator rep = new UnicodeSetIterator(repertoire);
107         while (rep.next()) {
108             if (!UCharacter.isDefined(rep.codepoint) && (rep.codepoint != UnicodeSetIterator.IS_STRING))
109                 repertoire.remove(rep.codepoint);
110         }
111 
112         POSIXUtilities.setCharFallback(char_fallbk);
113 
114         lc_collate = new POSIX_LCCollate(doc, repertoire, collrules, collateset, codeset, variant);
115 
116         if (codeset.equals("UTF-8")) {
117             UnicodeSet tailored = lc_collate.col.getTailoredSet();
118 
119             // Add the tailored characters, and close over script
120 
121             UnicodeSetIterator it = new UnicodeSetIterator(tailored);
122             int PreviousScript = UScript.INVALID_CODE;
123             while (it.next()) {
124                 if (it.codepoint != UnicodeSetIterator.IS_STRING && (it.codepoint <= 0x00ffff)) {
125                     int Script = UScript.getScript(it.codepoint);
126                     if (Script != UScript.COMMON &&
127                         Script != UScript.INHERITED &&
128                         Script != UScript.INVALID_CODE &&
129                         Script != UScript.HAN &&
130                         Script != PreviousScript) // Hopefully this speeds up the process...
131                     {
132                         UnicodeSet ThisScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, Script);
133                         UnicodeSetIterator ts = new UnicodeSetIterator(ThisScript);
134                         while (ts.next()) {
135                             if ((ts.codepoint != UnicodeSetIterator.IS_STRING) && (ts.codepoint <= 0x00ffff))
136                                 repertoire.add(ts.codepoint);
137                         }
138                         PreviousScript = Script;
139                     }
140                 }
141             }
142         }
143 
144         lc_ctype = new POSIX_LCCtype(doc, repertoire);
145         lc_numeric = new POSIX_LCNumeric(doc);
146         lc_monetary = new POSIX_LCMonetary(doc, supp, variant);
147         lc_time = new POSIX_LCTime(doc, variant);
148         lc_messages = new POSIX_LCMessages(doc, locale_name, variant);
149 
150     } // end POSIXLocale ( String locale_name, String cldr_data_location );
151 
write(PrintWriter out)152     public void write(PrintWriter out) {
153 
154         out.println("comment_char *");
155         out.println("escape_char /");
156         out.println("");
157         out.println("*************************************************************************************************");
158         out.println("* POSIX Locale                                                                                  *");
159         out.println("* Generated automatically from the Unicode Character Database and Common Locale Data Repository *");
160         out.println("* see http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html                    *");
161         out.println("* Locale Name : " + locale_name + "   Codeset : " + codeset);
162         out.println("*************************************************************************************************");
163         out.println("* Copyright 1991-2013 Unicode, Inc. All rights reserved. Distributed under the Terms of Use in  *");
164         out.println("* http://www.unicode.org/copyright.html.                                                        *");
165         out.println("*                                                                                               *");
166         out.println("* Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode   *");
167         out.println("* data files and any associated documentation (the \"Data Files\") or Unicode software and any    *");
168         out.println("* associated documentation (the \"Software\") to deal in the Data Files or Software without       *");
169         out.println("* restriction, including without limitation the rights to use, copy, modify, merge, publish,    *");
170         out.println("* distribute, and/or sell copies of the Data Files or Software, and to permit persons to whom   *");
171         out.println("* the Data Files or Software are furnished to do so, provided that (a) the above copyright      *");
172         out.println("* notice(s) and this permission notice appear with all copies of the Data Files or Software,    *");
173         out.println("* (b) both the above copyright notice(s) and this permission notice appear in associated        *");
174         out.println("* documentation, and (c) there is clear notice in each modified Data File or in the Software as *");
175         out.println("* well as in the documentation associated with the Data File(s) or Software that the data or    *");
176         out.println("* software has been modified.                                                                   *");
177         out.println("*                                                                                               *");
178         out.println("* THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR    *");
179         out.println("* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A        *");
180         out.println("* PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT *");
181         out.println("* HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR *");
182         out.println("* CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, *");
183         out.println("* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN   *");
184         out.println("* CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE.                         *");
185         out.println("*************************************************************************************************");
186         out.println("");
187 
188         lc_ctype.write(out);
189         lc_collate.write(out);
190         lc_numeric.write(out);
191         lc_monetary.write(out);
192         lc_time.write(out, variant);
193         lc_messages.write(out);
194 
195     } // end write(PrintWriter out);
196 
197 }
198