1 /* 2 * Copyright (c) 1996, 2000, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved 28 * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved 29 * 30 * The original version of this source code and documentation is copyrighted 31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These 32 * materials are provided under terms of a License Agreement between Taligent 33 * and Sun. This technology is protected by multiple US and International 34 * patents. This notice and attribution to Taligent may not be removed. 35 * Taligent is a registered trademark of Taligent, Inc. 36 * 37 */ 38 39 package java.text; 40 41 import java.lang.Character; 42 43 /** 44 * Utility class for normalizing and merging patterns for collation. 45 * This is to be used with MergeCollation for adding patterns to an 46 * existing rule table. 47 * @see MergeCollation 48 * @author Mark Davis, Helena Shih 49 */ 50 51 class PatternEntry { 52 /** 53 * Gets the current extension, quoted 54 */ appendQuotedExtension(StringBuffer toAddTo)55 public void appendQuotedExtension(StringBuffer toAddTo) { 56 appendQuoted(extension,toAddTo); 57 } 58 59 /** 60 * Gets the current chars, quoted 61 */ appendQuotedChars(StringBuffer toAddTo)62 public void appendQuotedChars(StringBuffer toAddTo) { 63 appendQuoted(chars,toAddTo); 64 } 65 66 /** 67 * WARNING this is used for searching in a Vector. 68 * Because Vector.indexOf doesn't take a comparator, 69 * this method is ill-defined and ignores strength. 70 */ equals(Object obj)71 public boolean equals(Object obj) { 72 if (obj == null) return false; 73 PatternEntry other = (PatternEntry) obj; 74 boolean result = chars.equals(other.chars); 75 return result; 76 } 77 hashCode()78 public int hashCode() { 79 return chars.hashCode(); 80 } 81 82 /** 83 * For debugging. 84 */ toString()85 public String toString() { 86 StringBuffer result = new StringBuffer(); 87 addToBuffer(result, true, false, null); 88 return result.toString(); 89 } 90 91 /** 92 * Gets the strength of the entry. 93 */ getStrength()94 final int getStrength() { 95 return strength; 96 } 97 98 /** 99 * Gets the expanding characters of the entry. 100 */ getExtension()101 final String getExtension() { 102 return extension; 103 } 104 105 /** 106 * Gets the core characters of the entry. 107 */ getChars()108 final String getChars() { 109 return chars; 110 } 111 112 // ===== privates ===== 113 addToBuffer(StringBuffer toAddTo, boolean showExtension, boolean showWhiteSpace, PatternEntry lastEntry)114 void addToBuffer(StringBuffer toAddTo, 115 boolean showExtension, 116 boolean showWhiteSpace, 117 PatternEntry lastEntry) 118 { 119 if (showWhiteSpace && toAddTo.length() > 0) 120 if (strength == Collator.PRIMARY || lastEntry != null) 121 toAddTo.append('\n'); 122 else 123 toAddTo.append(' '); 124 if (lastEntry != null) { 125 toAddTo.append('&'); 126 if (showWhiteSpace) 127 toAddTo.append(' '); 128 lastEntry.appendQuotedChars(toAddTo); 129 appendQuotedExtension(toAddTo); 130 if (showWhiteSpace) 131 toAddTo.append(' '); 132 } 133 var c = switch (strength) { 134 case Collator.IDENTICAL -> '='; 135 case Collator.TERTIARY -> ','; 136 case Collator.SECONDARY -> ';'; 137 case Collator.PRIMARY -> '<'; 138 case RESET -> '&'; 139 case UNSET -> '?'; 140 141 default -> throw new IllegalStateException("Unexpected value: " + strength); 142 }; 143 toAddTo.append(c); 144 145 if (showWhiteSpace) 146 toAddTo.append(' '); 147 appendQuoted(chars,toAddTo); 148 if (showExtension && !extension.isEmpty()) { 149 toAddTo.append('/'); 150 appendQuoted(extension,toAddTo); 151 } 152 } 153 appendQuoted(String chars, StringBuffer toAddTo)154 static void appendQuoted(String chars, StringBuffer toAddTo) { 155 boolean inQuote = false; 156 char ch = chars.charAt(0); 157 if (Character.isSpaceChar(ch)) { 158 inQuote = true; 159 toAddTo.append('\''); 160 } else { 161 if (PatternEntry.isSpecialChar(ch)) { 162 inQuote = true; 163 toAddTo.append('\''); 164 } else { 165 switch (ch) { 166 case 0x0010: case '\f': case '\r': 167 case '\t': case '\n': case '@': 168 inQuote = true; 169 toAddTo.append('\''); 170 break; 171 case '\'': 172 inQuote = true; 173 toAddTo.append('\''); 174 break; 175 default: 176 if (inQuote) { 177 inQuote = false; toAddTo.append('\''); 178 } 179 break; 180 } 181 } 182 } 183 toAddTo.append(chars); 184 if (inQuote) 185 toAddTo.append('\''); 186 } 187 188 //======================================================================== 189 // Parsing a pattern into a list of PatternEntries.... 190 //======================================================================== 191 PatternEntry(int strength, StringBuffer chars, StringBuffer extension)192 PatternEntry(int strength, 193 StringBuffer chars, 194 StringBuffer extension) 195 { 196 this.strength = strength; 197 this.chars = chars.toString(); 198 this.extension = (extension.length() > 0) ? extension.toString() 199 : ""; 200 } 201 202 static class Parser { 203 private String pattern; 204 private int i; 205 Parser(String pattern)206 public Parser(String pattern) { 207 this.pattern = pattern; 208 this.i = 0; 209 } 210 next()211 public PatternEntry next() throws ParseException { 212 int newStrength = UNSET; 213 214 newChars.setLength(0); 215 newExtension.setLength(0); 216 217 boolean inChars = true; 218 boolean inQuote = false; 219 mainLoop: 220 while (i < pattern.length()) { 221 char ch = pattern.charAt(i); 222 if (inQuote) { 223 if (ch == '\'') { 224 inQuote = false; 225 } else { 226 if (newChars.length() == 0) newChars.append(ch); 227 else if (inChars) newChars.append(ch); 228 else newExtension.append(ch); 229 } 230 } else switch (ch) { 231 case '=': if (newStrength != UNSET) break mainLoop; 232 newStrength = Collator.IDENTICAL; break; 233 case ',': if (newStrength != UNSET) break mainLoop; 234 newStrength = Collator.TERTIARY; break; 235 case ';': if (newStrength != UNSET) break mainLoop; 236 newStrength = Collator.SECONDARY; break; 237 case '<': if (newStrength != UNSET) break mainLoop; 238 newStrength = Collator.PRIMARY; break; 239 case '&': if (newStrength != UNSET) break mainLoop; 240 newStrength = RESET; break; 241 case '\t': 242 case '\n': 243 case '\f': 244 case '\r': 245 case ' ': break; // skip whitespace TODO use Character 246 case '/': inChars = false; break; 247 case '\'': 248 inQuote = true; 249 ch = pattern.charAt(++i); 250 if (newChars.length() == 0) newChars.append(ch); 251 else if (inChars) newChars.append(ch); 252 else newExtension.append(ch); 253 break; 254 default: 255 if (newStrength == UNSET) { 256 throw new ParseException 257 ("missing char (=,;<&) : " + 258 pattern.substring(i, 259 (i+10 < pattern.length()) ? 260 i+10 : pattern.length()), 261 i); 262 } 263 if (PatternEntry.isSpecialChar(ch) && (inQuote == false)) 264 throw new ParseException 265 ("Unquoted punctuation character : " + Integer.toString(ch, 16), i); 266 if (inChars) { 267 newChars.append(ch); 268 } else { 269 newExtension.append(ch); 270 } 271 break; 272 } 273 i++; 274 } 275 if (newStrength == UNSET) 276 return null; 277 if (newChars.length() == 0) { 278 throw new ParseException 279 ("missing chars (=,;<&): " + 280 pattern.substring(i, 281 (i+10 < pattern.length()) ? 282 i+10 : pattern.length()), 283 i); 284 } 285 286 return new PatternEntry(newStrength, newChars, newExtension); 287 } 288 289 // We re-use these objects in order to improve performance 290 private StringBuffer newChars = new StringBuffer(); 291 private StringBuffer newExtension = new StringBuffer(); 292 293 } 294 isSpecialChar(char ch)295 static boolean isSpecialChar(char ch) { 296 return ((ch == '\u0020') || 297 ((ch <= '\u002F') && (ch >= '\u0022')) || 298 ((ch <= '\u003F') && (ch >= '\u003A')) || 299 ((ch <= '\u0060') && (ch >= '\u005B')) || 300 ((ch <= '\u007E') && (ch >= '\u007B'))); 301 } 302 303 304 static final int RESET = -2; 305 static final int UNSET = -1; 306 307 int strength = UNSET; 308 String chars = ""; 309 String extension = ""; 310 } 311