• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright (c) 1996, 2012, Oracle and/or its affiliates. All rights reserved.
3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4   *
5   * This code is free software; you can redistribute it and/or modify it
6   * under the terms of the GNU General Public License version 2 only, as
7   * published by the Free Software Foundation.  Oracle designates this
8   * particular file as subject to the "Classpath" exception as provided
9   * by Oracle in the LICENSE file that accompanied this code.
10   *
11   * This code is distributed in the hope that it will be useful, but WITHOUT
12   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14   * version 2 for more details (a copy is included in the LICENSE file that
15   * accompanied this code).
16   *
17   * You should have received a copy of the GNU General Public License version
18   * 2 along with this work; if not, write to the Free Software Foundation,
19   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20   *
21   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22   * or visit www.oracle.com if you need additional information or have any
23   * questions.
24   */
25  
26  /*
27   * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28   * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
29   *
30   *   The original version of this source code and documentation is copyrighted
31   * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32   * materials are provided under terms of a License Agreement between Taligent
33   * and Sun. This technology is protected by multiple US and International
34   * patents. This notice and attribution to Taligent may not be removed.
35   *   Taligent is a registered trademark of Taligent, Inc.
36   *
37   */
38  
39  package java.text;
40  
41  import java.util.ArrayList;
42  
43  /**
44   * Utility class for normalizing and merging patterns for collation.
45   * Patterns are strings of the form <entry>*, where <entry> has the
46   * form:
47   * <pattern> := <entry>*
48   * <entry> := <separator><chars>{"/"<extension>}
49   * <separator> := "=", ",", ";", "<", "&"
50   * <chars>, and <extension> are both arbitrary strings.
51   * unquoted whitespaces are ignored.
52   * 'xxx' can be used to quote characters
53   * One difference from Collator is that & is used to reset to a current
54   * point. Or, in other words, it introduces a new sequence which is to
55   * be added to the old.
56   * That is: "a < b < c < d" is the same as "a < b & b < c & c < d" OR
57   * "a < b < d & b < c"
58   * XXX: make '' be a single quote.
59   * @see PatternEntry
60   * @author             Mark Davis, Helena Shih
61   */
62  
63  final class MergeCollation {
64  
65      /**
66       * Creates from a pattern
67       * @exception ParseException If the input pattern is incorrect.
68       */
MergeCollation(String pattern)69      public MergeCollation(String pattern) throws ParseException
70      {
71          for (int i = 0; i < statusArray.length; i++)
72              statusArray[i] = 0;
73          setPattern(pattern);
74      }
75  
76      /**
77       * recovers current pattern
78       */
getPattern()79      public String getPattern() {
80          return getPattern(true);
81      }
82  
83      /**
84       * recovers current pattern.
85       * @param withWhiteSpace puts spacing around the entries, and \n
86       * before & and <
87       */
getPattern(boolean withWhiteSpace)88      public String getPattern(boolean withWhiteSpace) {
89          StringBuffer result = new StringBuffer();
90          PatternEntry tmp = null;
91          ArrayList<PatternEntry> extList = null;
92          int i;
93          for (i = 0; i < patterns.size(); ++i) {
94              PatternEntry entry = patterns.get(i);
95              if (entry.extension.length() != 0) {
96                  if (extList == null)
97                      extList = new ArrayList<>();
98                  extList.add(entry);
99              } else {
100                  if (extList != null) {
101                      PatternEntry last = findLastWithNoExtension(i-1);
102                      for (int j = extList.size() - 1; j >= 0 ; j--) {
103                          tmp = extList.get(j);
104                          tmp.addToBuffer(result, false, withWhiteSpace, last);
105                      }
106                      extList = null;
107                  }
108                  entry.addToBuffer(result, false, withWhiteSpace, null);
109              }
110          }
111          if (extList != null) {
112              PatternEntry last = findLastWithNoExtension(i-1);
113              for (int j = extList.size() - 1; j >= 0 ; j--) {
114                  tmp = extList.get(j);
115                  tmp.addToBuffer(result, false, withWhiteSpace, last);
116              }
117              extList = null;
118          }
119          return result.toString();
120      }
121  
findLastWithNoExtension(int i)122      private final PatternEntry findLastWithNoExtension(int i) {
123          for (--i;i >= 0; --i) {
124              PatternEntry entry = patterns.get(i);
125              if (entry.extension.length() == 0) {
126                  return entry;
127              }
128          }
129          return null;
130      }
131  
132      /**
133       * emits the pattern for collation builder.
134       * @return emits the string in the format understable to the collation
135       * builder.
136       */
emitPattern()137      public String emitPattern() {
138          return emitPattern(true);
139      }
140  
141      /**
142       * emits the pattern for collation builder.
143       * @param withWhiteSpace puts spacing around the entries, and \n
144       * before & and <
145       * @return emits the string in the format understable to the collation
146       * builder.
147       */
emitPattern(boolean withWhiteSpace)148      public String emitPattern(boolean withWhiteSpace) {
149          StringBuffer result = new StringBuffer();
150          for (int i = 0; i < patterns.size(); ++i)
151          {
152              PatternEntry entry = patterns.get(i);
153              if (entry != null) {
154                  entry.addToBuffer(result, true, withWhiteSpace, null);
155              }
156          }
157          return result.toString();
158      }
159  
160      /**
161       * sets the pattern.
162       */
setPattern(String pattern)163      public void setPattern(String pattern) throws ParseException
164      {
165          patterns.clear();
166          addPattern(pattern);
167      }
168  
169      /**
170       * adds a pattern to the current one.
171       * @param pattern the new pattern to be added
172       */
addPattern(String pattern)173      public void addPattern(String pattern) throws ParseException
174      {
175          if (pattern == null)
176              return;
177  
178          PatternEntry.Parser parser = new PatternEntry.Parser(pattern);
179  
180          PatternEntry entry = parser.next();
181          while (entry != null) {
182              fixEntry(entry);
183              entry = parser.next();
184          }
185      }
186  
187      /**
188       * gets count of separate entries
189       * @return the size of pattern entries
190       */
getCount()191      public int getCount() {
192          return patterns.size();
193      }
194  
195      /**
196       * gets count of separate entries
197       * @param index the offset of the desired pattern entry
198       * @return the requested pattern entry
199       */
getItemAt(int index)200      public PatternEntry getItemAt(int index) {
201          return patterns.get(index);
202      }
203  
204      //============================================================
205      // privates
206      //============================================================
207      ArrayList<PatternEntry> patterns = new ArrayList<>(); // a list of PatternEntries
208  
209      private transient PatternEntry saveEntry = null;
210      private transient PatternEntry lastEntry = null;
211  
212      // This is really used as a local variable inside fixEntry, but we cache
213      // it here to avoid newing it up every time the method is called.
214      private transient StringBuffer excess = new StringBuffer();
215  
216      //
217      // When building a MergeCollation, we need to do lots of searches to see
218      // whether a given entry is already in the table.  Since we're using an
219      // array, this would make the algorithm O(N*N).  To speed things up, we
220      // use this bit array to remember whether the array contains any entries
221      // starting with each Unicode character.  If not, we can avoid the search.
222      // Using BitSet would make this easier, but it's significantly slower.
223      //
224      private transient byte[] statusArray = new byte[8192];
225      private final byte BITARRAYMASK = (byte)0x1;
226      private final int  BYTEPOWER = 3;
227      private final int  BYTEMASK = (1 << BYTEPOWER) - 1;
228  
229      /*
230        If the strength is RESET, then just change the lastEntry to
231        be the current. (If the current is not in patterns, signal an error).
232        If not, then remove the current entry, and add it after lastEntry
233        (which is usually at the end).
234        */
fixEntry(PatternEntry newEntry)235      private final void fixEntry(PatternEntry newEntry) throws ParseException
236      {
237          // check to see whether the new entry has the same characters as the previous
238          // entry did (this can happen when a pattern declaring a difference between two
239          // strings that are canonically equivalent is normalized).  If so, and the strength
240          // is anything other than IDENTICAL or RESET, throw an exception (you can't
241          // declare a string to be unequal to itself).       --rtg 5/24/99
242          if (lastEntry != null && newEntry.chars.equals(lastEntry.chars)
243                  && newEntry.extension.equals(lastEntry.extension)) {
244              if (newEntry.strength != Collator.IDENTICAL
245                  && newEntry.strength != PatternEntry.RESET) {
246                      throw new ParseException("The entries " + lastEntry + " and "
247                              + newEntry + " are adjacent in the rules, but have conflicting "
248                              + "strengths: A character can't be unequal to itself.", -1);
249              } else {
250                  // otherwise, just skip this entry and behave as though you never saw it
251                  return;
252              }
253          }
254  
255          boolean changeLastEntry = true;
256          if (newEntry.strength != PatternEntry.RESET) {
257              int oldIndex = -1;
258  
259              if ((newEntry.chars.length() == 1)) {
260  
261                  char c = newEntry.chars.charAt(0);
262                  int statusIndex = c >> BYTEPOWER;
263                  byte bitClump = statusArray[statusIndex];
264                  byte setBit = (byte)(BITARRAYMASK << (c & BYTEMASK));
265  
266                  if (bitClump != 0 && (bitClump & setBit) != 0) {
267                      oldIndex = patterns.lastIndexOf(newEntry);
268                  } else {
269                      // We're going to add an element that starts with this
270                      // character, so go ahead and set its bit.
271                      statusArray[statusIndex] = (byte)(bitClump | setBit);
272                  }
273              } else {
274                  oldIndex = patterns.lastIndexOf(newEntry);
275              }
276              if (oldIndex != -1) {
277                  patterns.remove(oldIndex);
278              }
279  
280              excess.setLength(0);
281              int lastIndex = findLastEntry(lastEntry, excess);
282  
283              if (excess.length() != 0) {
284                  newEntry.extension = excess + newEntry.extension;
285                  if (lastIndex != patterns.size()) {
286                      lastEntry = saveEntry;
287                      changeLastEntry = false;
288                  }
289              }
290              if (lastIndex == patterns.size()) {
291                  patterns.add(newEntry);
292                  saveEntry = newEntry;
293              } else {
294                  patterns.add(lastIndex, newEntry);
295              }
296          }
297          if (changeLastEntry) {
298              lastEntry = newEntry;
299          }
300      }
301  
findLastEntry(PatternEntry entry, StringBuffer excessChars)302      private final int findLastEntry(PatternEntry entry,
303                                StringBuffer excessChars) throws ParseException
304      {
305          if (entry == null)
306              return 0;
307  
308          if (entry.strength != PatternEntry.RESET) {
309              // Search backwards for string that contains this one;
310              // most likely entry is last one
311  
312              int oldIndex = -1;
313              if ((entry.chars.length() == 1)) {
314                  int index = entry.chars.charAt(0) >> BYTEPOWER;
315                  if ((statusArray[index] &
316                      (BITARRAYMASK << (entry.chars.charAt(0) & BYTEMASK))) != 0) {
317                      oldIndex = patterns.lastIndexOf(entry);
318                  }
319              } else {
320                  oldIndex = patterns.lastIndexOf(entry);
321              }
322              if ((oldIndex == -1))
323                  throw new ParseException("couldn't find last entry: "
324                                            + entry, oldIndex);
325              return oldIndex + 1;
326          } else {
327              int i;
328              for (i = patterns.size() - 1; i >= 0; --i) {
329                  PatternEntry e = patterns.get(i);
330                  if (e.chars.regionMatches(0,entry.chars,0,
331                                                e.chars.length())) {
332                      excessChars.append(entry.chars.substring(e.chars.length(),
333                                                              entry.chars.length()));
334                      break;
335                  }
336              }
337              if (i == -1)
338                  throw new ParseException("couldn't find: " + entry, i);
339              return i + 1;
340          }
341      }
342  }
343