1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. Oracle designates this 9 * particular file as subject to the "Classpath" exception as provided 10 * by Oracle in the LICENSE file that accompanied this code. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 23 * or visit www.oracle.com if you need additional information or have any 24 * questions. 25 */ 26 27 package java.lang; 28 29 import java.io.UnsupportedEncodingException; 30 import java.lang.ref.SoftReference; 31 import java.nio.ByteBuffer; 32 import java.nio.CharBuffer; 33 import java.nio.charset.Charset; 34 import java.nio.charset.CharsetDecoder; 35 import java.nio.charset.CharsetEncoder; 36 import java.nio.charset.CharacterCodingException; 37 import java.nio.charset.CoderResult; 38 import java.nio.charset.CodingErrorAction; 39 import java.nio.charset.IllegalCharsetNameException; 40 import java.nio.charset.UnsupportedCharsetException; 41 import java.util.Arrays; 42 import sun.misc.MessageUtils; 43 import sun.nio.cs.HistoricallyNamedCharset; 44 import sun.nio.cs.ArrayDecoder; 45 import sun.nio.cs.ArrayEncoder; 46 47 /** 48 * Utility class for string encoding and decoding. 49 */ 50 51 class StringCoding { 52 StringCoding()53 private StringCoding() { } 54 55 /** The cached coders for each thread */ 56 private final static ThreadLocal<SoftReference<StringDecoder>> decoder = 57 new ThreadLocal<>(); 58 private final static ThreadLocal<SoftReference<StringEncoder>> encoder = 59 new ThreadLocal<>(); 60 61 private static boolean warnUnsupportedCharset = true; 62 deref(ThreadLocal<SoftReference<T>> tl)63 private static <T> T deref(ThreadLocal<SoftReference<T>> tl) { 64 SoftReference<T> sr = tl.get(); 65 if (sr == null) 66 return null; 67 return sr.get(); 68 } 69 set(ThreadLocal<SoftReference<T>> tl, T ob)70 private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) { 71 tl.set(new SoftReference<T>(ob)); 72 } 73 74 // Trim the given byte array to the given length 75 // safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted)76 private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) { 77 78 /* ----- BEGIN android ----- 79 if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) 80 // Libcore tests expect a defensive copy in pretty much all cases. 81 // + System.getSecurityManager() == null is always true on android 82 */ 83 if (len == ba.length && (isTrusted)) 84 return ba; 85 else 86 return Arrays.copyOf(ba, len); 87 } 88 89 // Trim the given char array to the given length 90 // safeTrim(char[] ca, int len, Charset cs, boolean isTrusted)91 private static char[] safeTrim(char[] ca, int len, 92 Charset cs, boolean isTrusted) { 93 /* ----- BEGIN android ----- 94 if (len == ca.length && (isTrusted || System.getSecurityManager() == null)) 95 // Libcore tests expect a defensive copy in pretty much all cases. 96 // + System.getSecurityManager() == null is always true on android 97 */ 98 if (len == ca.length && (isTrusted)) 99 return ca; 100 else 101 return Arrays.copyOf(ca, len); 102 } 103 scale(int len, float expansionFactor)104 private static int scale(int len, float expansionFactor) { 105 // We need to perform double, not float, arithmetic; otherwise 106 // we lose low order bits when len is larger than 2**24. 107 return (int)(len * (double)expansionFactor); 108 } 109 lookupCharset(String csn)110 private static Charset lookupCharset(String csn) { 111 if (Charset.isSupported(csn)) { 112 try { 113 return Charset.forName(csn); 114 } catch (UnsupportedCharsetException x) { 115 throw new Error(x); 116 } 117 } 118 return null; 119 } 120 warnUnsupportedCharset(String csn)121 private static void warnUnsupportedCharset(String csn) { 122 if (warnUnsupportedCharset) { 123 // Use sun.misc.MessageUtils rather than the Logging API or 124 // System.err since this method may be called during VM 125 // initialization before either is available. 126 MessageUtils.err("WARNING: Default charset " + csn + 127 " not supported, using ISO-8859-1 instead"); 128 warnUnsupportedCharset = false; 129 } 130 } 131 132 133 // -- Decoding -- 134 private static class StringDecoder { 135 private final String requestedCharsetName; 136 private final Charset cs; 137 private final CharsetDecoder cd; 138 private final boolean isTrusted; 139 StringDecoder(Charset cs, String rcn)140 private StringDecoder(Charset cs, String rcn) { 141 this.requestedCharsetName = rcn; 142 this.cs = cs; 143 this.cd = cs.newDecoder() 144 .onMalformedInput(CodingErrorAction.REPLACE) 145 .onUnmappableCharacter(CodingErrorAction.REPLACE); 146 this.isTrusted = (cs.getClass().getClassLoader() == null); 147 } 148 charsetName()149 String charsetName() { 150 if (cs instanceof HistoricallyNamedCharset) 151 return ((HistoricallyNamedCharset)cs).historicalName(); 152 return cs.name(); 153 } 154 requestedCharsetName()155 final String requestedCharsetName() { 156 return requestedCharsetName; 157 } 158 decode(byte[] ba, int off, int len)159 char[] decode(byte[] ba, int off, int len) { 160 int en = scale(len, cd.maxCharsPerByte()); 161 char[] ca = new char[en]; 162 if (len == 0) 163 return ca; 164 if (cd instanceof ArrayDecoder) { 165 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 166 return safeTrim(ca, clen, cs, isTrusted); 167 } else { 168 cd.reset(); 169 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 170 CharBuffer cb = CharBuffer.wrap(ca); 171 try { 172 CoderResult cr = cd.decode(bb, cb, true); 173 if (!cr.isUnderflow()) 174 cr.throwException(); 175 cr = cd.flush(cb); 176 if (!cr.isUnderflow()) 177 cr.throwException(); 178 } catch (CharacterCodingException x) { 179 // Substitution is always enabled, 180 // so this shouldn't happen 181 throw new Error(x); 182 } 183 return safeTrim(ca, cb.position(), cs, isTrusted); 184 } 185 } 186 } 187 decode(String charsetName, byte[] ba, int off, int len)188 static char[] decode(String charsetName, byte[] ba, int off, int len) 189 throws UnsupportedEncodingException 190 { 191 StringDecoder sd = deref(decoder); 192 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 193 if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) 194 || csn.equals(sd.charsetName()))) { 195 sd = null; 196 try { 197 Charset cs = lookupCharset(csn); 198 if (cs != null) 199 sd = new StringDecoder(cs, csn); 200 } catch (IllegalCharsetNameException x) {} 201 if (sd == null) 202 throw new UnsupportedEncodingException(csn); 203 set(decoder, sd); 204 } 205 return sd.decode(ba, off, len); 206 } 207 decode(Charset cs, byte[] ba, int off, int len)208 static char[] decode(Charset cs, byte[] ba, int off, int len) { 209 // (1)We never cache the "external" cs, the only benefit of creating 210 // an additional StringDe/Encoder object to wrap it is to share the 211 // de/encode() method. These SD/E objects are short-lifed, the young-gen 212 // gc should be able to take care of them well. But the best approash 213 // is still not to generate them if not really necessary. 214 // (2)The defensive copy of the input byte/char[] has a big performance 215 // impact, as well as the outgoing result byte/char[]. Need to do the 216 // optimization check of (sm==null && classLoader0==null) for both. 217 // (3)getClass().getClassLoader0() is expensive 218 // (4)There might be a timing gap in isTrusted setting. getClassLoader0() 219 // is only chcked (and then isTrusted gets set) when (SM==null). It is 220 // possible that the SM==null for now but then SM is NOT null later 221 // when safeTrim() is invoked...the "safe" way to do is to redundant 222 // check (... && (isTrusted || SM == null || getClassLoader0())) in trim 223 // but it then can be argued that the SM is null when the opertaion 224 // is started... 225 CharsetDecoder cd = cs.newDecoder(); 226 int en = scale(len, cd.maxCharsPerByte()); 227 char[] ca = new char[en]; 228 if (len == 0) 229 return ca; 230 boolean isTrusted = false; 231 if (System.getSecurityManager() != null) { 232 if (!(isTrusted = (cs.getClass().getClassLoader() == null))) { 233 ba = Arrays.copyOfRange(ba, off, off + len); 234 off = 0; 235 } 236 } 237 cd.onMalformedInput(CodingErrorAction.REPLACE) 238 .onUnmappableCharacter(CodingErrorAction.REPLACE) 239 .reset(); 240 if (cd instanceof ArrayDecoder) { 241 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 242 return safeTrim(ca, clen, cs, isTrusted); 243 } else { 244 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 245 CharBuffer cb = CharBuffer.wrap(ca); 246 try { 247 CoderResult cr = cd.decode(bb, cb, true); 248 if (!cr.isUnderflow()) 249 cr.throwException(); 250 cr = cd.flush(cb); 251 if (!cr.isUnderflow()) 252 cr.throwException(); 253 } catch (CharacterCodingException x) { 254 // Substitution is always enabled, 255 // so this shouldn't happen 256 throw new Error(x); 257 } 258 return safeTrim(ca, cb.position(), cs, isTrusted); 259 } 260 } 261 decode(byte[] ba, int off, int len)262 static char[] decode(byte[] ba, int off, int len) { 263 String csn = Charset.defaultCharset().name(); 264 try { 265 // use charset name decode() variant which provides caching. 266 return decode(csn, ba, off, len); 267 } catch (UnsupportedEncodingException x) { 268 warnUnsupportedCharset(csn); 269 } 270 try { 271 return decode("ISO-8859-1", ba, off, len); 272 } catch (UnsupportedEncodingException x) { 273 // If this code is hit during VM initialization, MessageUtils is 274 // the only way we will be able to get any kind of error message. 275 MessageUtils.err("ISO-8859-1 charset not available: " 276 + x.toString()); 277 // If we can not find ISO-8859-1 (a required encoding) then things 278 // are seriously wrong with the installation. 279 System.exit(1); 280 return null; 281 } 282 } 283 284 // -- Encoding -- 285 private static class StringEncoder { 286 private Charset cs; 287 private CharsetEncoder ce; 288 private final String requestedCharsetName; 289 private final boolean isTrusted; 290 StringEncoder(Charset cs, String rcn)291 private StringEncoder(Charset cs, String rcn) { 292 this.requestedCharsetName = rcn; 293 this.cs = cs; 294 this.ce = cs.newEncoder() 295 .onMalformedInput(CodingErrorAction.REPLACE) 296 .onUnmappableCharacter(CodingErrorAction.REPLACE); 297 this.isTrusted = (cs.getClass().getClassLoader() == null); 298 } 299 charsetName()300 String charsetName() { 301 if (cs instanceof HistoricallyNamedCharset) 302 return ((HistoricallyNamedCharset)cs).historicalName(); 303 return cs.name(); 304 } 305 requestedCharsetName()306 final String requestedCharsetName() { 307 return requestedCharsetName; 308 } 309 encode(char[] ca, int off, int len)310 byte[] encode(char[] ca, int off, int len) { 311 int en = scale(len, ce.maxBytesPerChar()); 312 if (len == 0) 313 return new byte[0]; 314 if (ce instanceof ArrayEncoder) { 315 byte[] ba = new byte[en]; 316 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 317 return safeTrim(ba, blen, cs, isTrusted); 318 } else { 319 ce.reset(); 320 CharBuffer cb = CharBuffer.wrap(ca, off, len); 321 try { 322 /* ----- BEGIN android ----- 323 CoderResult cr = ce.encode(cb, bb, true); 324 Pass read-only buffer, so the encoder can't alter it */ 325 ByteBuffer bb = ce.encode(cb.asReadOnlyBuffer()); 326 return safeTrim(bb.array(), bb.limit(), cs, isTrusted); 327 } catch (CharacterCodingException x) { 328 // Substitution is always enabled, 329 // so this shouldn't happen 330 throw new Error(x); 331 } 332 } 333 } 334 } 335 encode(String charsetName, char[] ca, int off, int len)336 static byte[] encode(String charsetName, char[] ca, int off, int len) 337 throws UnsupportedEncodingException 338 { 339 StringEncoder se = deref(encoder); 340 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 341 if ((se == null) || !(csn.equals(se.requestedCharsetName()) 342 || csn.equals(se.charsetName()))) { 343 se = null; 344 try { 345 Charset cs = lookupCharset(csn); 346 if (cs != null) 347 se = new StringEncoder(cs, csn); 348 } catch (IllegalCharsetNameException x) {} 349 if (se == null) 350 throw new UnsupportedEncodingException (csn); 351 set(encoder, se); 352 } 353 return se.encode(ca, off, len); 354 } 355 encode(Charset cs, char[] ca, int off, int len)356 static byte[] encode(Charset cs, char[] ca, int off, int len) { 357 CharsetEncoder ce = cs.newEncoder(); 358 int en = scale(len, ce.maxBytesPerChar()); 359 byte[] ba = new byte[en]; 360 if (len == 0) 361 return ba; 362 boolean isTrusted = false; 363 if (System.getSecurityManager() != null) { 364 if (!(isTrusted = (cs.getClass().getClassLoader() == null))) { 365 ca = Arrays.copyOfRange(ca, off, off + len); 366 off = 0; 367 } 368 } 369 ce.onMalformedInput(CodingErrorAction.REPLACE) 370 .onUnmappableCharacter(CodingErrorAction.REPLACE) 371 .reset(); 372 if (ce instanceof ArrayEncoder) { 373 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 374 return safeTrim(ba, blen, cs, isTrusted); 375 } else { 376 ByteBuffer bb = ByteBuffer.wrap(ba); 377 CharBuffer cb = CharBuffer.wrap(ca, off, len); 378 try { 379 /* ----- BEGIN android ----- 380 CoderResult cr = ce.encode(cb, bb, true); 381 Pass read-only buffer, so the encoder can't alter it */ 382 CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true); 383 if (!cr.isUnderflow()) 384 cr.throwException(); 385 cr = ce.flush(bb); 386 if (!cr.isUnderflow()) 387 cr.throwException(); 388 } catch (CharacterCodingException x) { 389 throw new Error(x); 390 } 391 return safeTrim(ba, bb.position(), cs, isTrusted); 392 } 393 } 394 encode(Charset cs, String str)395 static byte[] encode(Charset cs, String str) { 396 ByteBuffer buffer = cs.encode(str); 397 byte[] bytes = new byte[buffer.limit()]; 398 buffer.get(bytes); 399 return bytes; 400 } 401 encode(char[] ca, int off, int len)402 static byte[] encode(char[] ca, int off, int len) { 403 String csn = Charset.defaultCharset().name(); 404 try { 405 // use charset name encode() variant which provides caching. 406 return encode(csn, ca, off, len); 407 } catch (UnsupportedEncodingException x) { 408 warnUnsupportedCharset(csn); 409 } 410 try { 411 return encode("ISO-8859-1", ca, off, len); 412 } catch (UnsupportedEncodingException x) { 413 // If this code is hit during VM initialization, MessageUtils is 414 // the only way we will be able to get any kind of error message. 415 MessageUtils.err("ISO-8859-1 charset not available: " 416 + x.toString()); 417 // If we can not find ISO-8859-1 (a required encoding) then things 418 // are seriously wrong with the installation. 419 System.exit(1); 420 return null; 421 } 422 } 423 } 424