• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  * Copyright (c) 2005-2011, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: John Emmons
7  **********************************************************************
8  */
9 package org.unicode.cldr.posix;
10 
11 import java.io.PrintWriter;
12 import java.nio.charset.Charset;
13 
14 import org.unicode.cldr.icu.SimpleConverter;
15 import org.unicode.cldr.util.CLDRFile;
16 import org.unicode.cldr.util.CLDRPaths;
17 import org.unicode.cldr.util.CldrUtility;
18 import org.unicode.cldr.util.Factory;
19 import org.unicode.cldr.util.SupplementalDataInfo;
20 
21 import com.ibm.icu.lang.UCharacter;
22 import com.ibm.icu.lang.UProperty;
23 import com.ibm.icu.lang.UScript;
24 import com.ibm.icu.text.UnicodeSet;
25 import com.ibm.icu.text.UnicodeSetIterator;
26 
27 /**
28  * Class to generate POSIX format from CLDR.
29  *
30  * @author jcemmons
31  */
32 
33 public class POSIXLocale {
34 
35     String locale_name;
36     String codeset;
37     POSIX_LCCtype lc_ctype;
38     POSIX_LCCollate lc_collate;
39     POSIX_LCNumeric lc_numeric;
40     POSIX_LCMonetary lc_monetary;
41     POSIX_LCTime lc_time;
42     POSIX_LCMessages lc_messages;
43     POSIXVariant variant;
44 
POSIXLocale(String locale_name, UnicodeSet repertoire, Charset cs, String codeset, UnicodeSet collateset, POSIXVariant variant)45     public POSIXLocale(String locale_name, UnicodeSet repertoire, Charset cs, String codeset, UnicodeSet collateset,
46         POSIXVariant variant) throws Exception {
47 
48         this.locale_name = locale_name;
49         this.codeset = codeset;
50         this.variant = variant;
51 
52         Factory mainFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*");
53         Factory suppFactory = Factory.make(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY, ".*");
54         Factory collFactory = Factory.make(CLDRPaths.COLLATION_DIRECTORY, ".*");
55         CLDRFile doc = mainFactory.make(locale_name, true);
56         SupplementalDataInfo supp = SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
57         CLDRFile char_fallbk = suppFactory.make("characters", false);
58         CLDRFile collrules = collFactory.makeWithFallback(locale_name);
59 
60         if (repertoire.isEmpty() && codeset.equals("UTF-8")) // Generate default repertoire set from exemplar
61         // characters;
62         {
63             String SearchLocation = "//ldml/characters/exemplarCharacters";
64             UnicodeSet ExemplarCharacters = new UnicodeSet(doc.getStringValue(SearchLocation));
65             UnicodeSetIterator ec = new UnicodeSetIterator(ExemplarCharacters);
66             while (ec.next()) {
67                 if ((ec.codepoint != UnicodeSetIterator.IS_STRING) && (ec.codepoint <= 0x00ffff))
68                     repertoire.add(ec.codepoint);
69             }
70             UnicodeSet CaseFoldedExemplars = new UnicodeSet(ExemplarCharacters.closeOver(UnicodeSet.CASE));
71             UnicodeSetIterator cfe = new UnicodeSetIterator(CaseFoldedExemplars);
72             while (cfe.next()) {
73                 if ((cfe.codepoint != UnicodeSetIterator.IS_STRING) && (cfe.codepoint <= 0x00ffff))
74                     repertoire.add(cfe.codepoint);
75             }
76 
77             UnicodeSetIterator it = new UnicodeSetIterator(repertoire);
78             int PreviousScript = UScript.INVALID_CODE;
79             while (it.next()) {
80                 if ((it.codepoint != UnicodeSetIterator.IS_STRING) && (it.codepoint <= 0x00ffff)) {
81                     int Script = UScript.getScript(it.codepoint);
82                     if (Script != UScript.COMMON &&
83                         Script != UScript.INHERITED &&
84                         Script != UScript.INVALID_CODE &&
85                         Script != UScript.HAN &&
86                         Script != PreviousScript) // Hopefully this speeds up the process...
87                     {
88                         UnicodeSet ThisScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, Script);
89                         UnicodeSetIterator ts = new UnicodeSetIterator(ThisScript);
90                         while (ts.next()) {
91                             if ((ts.codepoint != UnicodeSetIterator.IS_STRING) && (ts.codepoint <= 0x00ffff))
92                                 repertoire.add(ts.codepoint);
93                         }
94                         PreviousScript = Script;
95                     }
96                 }
97             }
98 
99             repertoire.add(0x0000, 0x007f); // Always add the ASCII set
100 
101         } else if (!codeset.equals("UTF-8")) {
102             UnicodeSet csset = new SimpleConverter(cs).getCharset();
103             repertoire = new UnicodeSet(UnicodeSet.MIN_VALUE, UnicodeSet.MAX_VALUE).retainAll(csset);
104             POSIXUtilities.setRepertoire(repertoire);
105         }
106 
107         UnicodeSetIterator rep = new UnicodeSetIterator(repertoire);
108         while (rep.next()) {
109             if (!UCharacter.isDefined(rep.codepoint) && (rep.codepoint != UnicodeSetIterator.IS_STRING))
110                 repertoire.remove(rep.codepoint);
111         }
112 
113         POSIXUtilities.setCharFallback(char_fallbk);
114 
115         lc_collate = new POSIX_LCCollate(doc, repertoire, collrules, collateset, codeset, variant);
116 
117         if (codeset.equals("UTF-8")) {
118             UnicodeSet tailored = lc_collate.col.getTailoredSet();
119 
120             // Add the tailored characters, and close over script
121 
122             UnicodeSetIterator it = new UnicodeSetIterator(tailored);
123             int PreviousScript = UScript.INVALID_CODE;
124             while (it.next()) {
125                 if (it.codepoint != UnicodeSetIterator.IS_STRING && (it.codepoint <= 0x00ffff)) {
126                     int Script = UScript.getScript(it.codepoint);
127                     if (Script != UScript.COMMON &&
128                         Script != UScript.INHERITED &&
129                         Script != UScript.INVALID_CODE &&
130                         Script != UScript.HAN &&
131                         Script != PreviousScript) // Hopefully this speeds up the process...
132                     {
133                         UnicodeSet ThisScript = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, Script);
134                         UnicodeSetIterator ts = new UnicodeSetIterator(ThisScript);
135                         while (ts.next()) {
136                             if ((ts.codepoint != UnicodeSetIterator.IS_STRING) && (ts.codepoint <= 0x00ffff))
137                                 repertoire.add(ts.codepoint);
138                         }
139                         PreviousScript = Script;
140                     }
141                 }
142             }
143         }
144 
145         lc_ctype = new POSIX_LCCtype(doc, repertoire);
146         lc_numeric = new POSIX_LCNumeric(doc);
147         lc_monetary = new POSIX_LCMonetary(doc, supp, variant);
148         lc_time = new POSIX_LCTime(doc, variant);
149         lc_messages = new POSIX_LCMessages(doc, locale_name, variant);
150 
151     } // end POSIXLocale ( String locale_name, String cldr_data_location );
152 
write(PrintWriter out)153     public void write(PrintWriter out) {
154 
155         out.println("comment_char *");
156         out.println("escape_char /");
157         out.println("");
158         out.println("*************************************************************************************************");
159         out.println("* POSIX Locale                                                                                  *");
160         out.println("* Generated automatically from the Unicode Character Database and Common Locale Data Repository *");
161         out.println("* see http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html                    *");
162         out.println("* Locale Name : " + locale_name + "   Codeset : " + codeset);
163         out.println("*************************************************************************************************");
164         out.println(CldrUtility.getCopyrightString("* "));
165 
166         lc_ctype.write(out);
167         lc_collate.write(out);
168         lc_numeric.write(out);
169         lc_monetary.write(out);
170         lc_time.write(out, variant);
171         lc_messages.write(out);
172 
173     } // end write(PrintWriter out);
174 
175 }
176