• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.  Oracle designates this
9  * particular file as subject to the "Classpath" exception as provided
10  * by Oracle in the LICENSE file that accompanied this code.
11  *
12  * This code is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15  * version 2 for more details (a copy is included in the LICENSE file that
16  * accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License version
19  * 2 along with this work; if not, write to the Free Software Foundation,
20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23  * or visit www.oracle.com if you need additional information or have any
24  * questions.
25  */
26 
27 package java.lang;
28 
29 import java.io.UnsupportedEncodingException;
30 import java.lang.ref.SoftReference;
31 import java.nio.ByteBuffer;
32 import java.nio.CharBuffer;
33 import java.nio.charset.Charset;
34 import java.nio.charset.CharsetDecoder;
35 import java.nio.charset.CharsetEncoder;
36 import java.nio.charset.CharacterCodingException;
37 import java.nio.charset.CoderResult;
38 import java.nio.charset.CodingErrorAction;
39 import java.nio.charset.IllegalCharsetNameException;
40 import java.nio.charset.UnsupportedCharsetException;
41 import java.util.Arrays;
42 import sun.misc.MessageUtils;
43 import sun.nio.cs.HistoricallyNamedCharset;
44 import sun.nio.cs.ArrayDecoder;
45 import sun.nio.cs.ArrayEncoder;
46 
47 /**
48  * Utility class for string encoding and decoding.
49  */
50 
51 class StringCoding {
52 
StringCoding()53     private StringCoding() { }
54 
55     /** The cached coders for each thread */
56     private final static ThreadLocal<SoftReference<StringDecoder>> decoder =
57         new ThreadLocal<>();
58     private final static ThreadLocal<SoftReference<StringEncoder>> encoder =
59         new ThreadLocal<>();
60 
61     private static boolean warnUnsupportedCharset = true;
62 
deref(ThreadLocal<SoftReference<T>> tl)63     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
64         SoftReference<T> sr = tl.get();
65         if (sr == null)
66             return null;
67         return sr.get();
68     }
69 
set(ThreadLocal<SoftReference<T>> tl, T ob)70     private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
71         tl.set(new SoftReference<T>(ob));
72     }
73 
74     // Trim the given byte array to the given length
75     //
safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted)76     private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
77 
78         /* ----- BEGIN android -----
79         if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
80         // Libcore tests expect a defensive copy in pretty much all cases.
81         // + System.getSecurityManager() == null is always true on android
82         */
83         if (len == ba.length && (isTrusted))
84             return ba;
85         else
86             return Arrays.copyOf(ba, len);
87     }
88 
89     // Trim the given char array to the given length
90     //
safeTrim(char[] ca, int len, Charset cs, boolean isTrusted)91     private static char[] safeTrim(char[] ca, int len,
92                                    Charset cs, boolean isTrusted) {
93         /* ----- BEGIN android -----
94         if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
95         // Libcore tests expect a defensive copy in pretty much all cases.
96         // + System.getSecurityManager() == null is always true on android
97         */
98         if (len == ca.length && (isTrusted))
99             return ca;
100         else
101             return Arrays.copyOf(ca, len);
102     }
103 
scale(int len, float expansionFactor)104     private static int scale(int len, float expansionFactor) {
105         // We need to perform double, not float, arithmetic; otherwise
106         // we lose low order bits when len is larger than 2**24.
107         return (int)(len * (double)expansionFactor);
108     }
109 
lookupCharset(String csn)110     private static Charset lookupCharset(String csn) {
111         if (Charset.isSupported(csn)) {
112             try {
113                 return Charset.forName(csn);
114             } catch (UnsupportedCharsetException x) {
115                 throw new Error(x);
116             }
117         }
118         return null;
119     }
120 
warnUnsupportedCharset(String csn)121     private static void warnUnsupportedCharset(String csn) {
122         if (warnUnsupportedCharset) {
123             // Use sun.misc.MessageUtils rather than the Logging API or
124             // System.err since this method may be called during VM
125             // initialization before either is available.
126             MessageUtils.err("WARNING: Default charset " + csn +
127                              " not supported, using ISO-8859-1 instead");
128             warnUnsupportedCharset = false;
129         }
130     }
131 
132 
133     // -- Decoding --
134     private static class StringDecoder {
135         private final String requestedCharsetName;
136         private final Charset cs;
137         private final CharsetDecoder cd;
138         private final boolean isTrusted;
139 
StringDecoder(Charset cs, String rcn)140         private StringDecoder(Charset cs, String rcn) {
141             this.requestedCharsetName = rcn;
142             this.cs = cs;
143             this.cd = cs.newDecoder()
144                 .onMalformedInput(CodingErrorAction.REPLACE)
145                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
146             this.isTrusted = (cs.getClass().getClassLoader() == null);
147         }
148 
charsetName()149         String charsetName() {
150             if (cs instanceof HistoricallyNamedCharset)
151                 return ((HistoricallyNamedCharset)cs).historicalName();
152             return cs.name();
153         }
154 
requestedCharsetName()155         final String requestedCharsetName() {
156             return requestedCharsetName;
157         }
158 
decode(byte[] ba, int off, int len)159         char[] decode(byte[] ba, int off, int len) {
160             int en = scale(len, cd.maxCharsPerByte());
161             char[] ca = new char[en];
162             if (len == 0)
163                 return ca;
164             if (cd instanceof ArrayDecoder) {
165                 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
166                 return safeTrim(ca, clen, cs, isTrusted);
167             } else {
168                 cd.reset();
169                 ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
170                 CharBuffer cb = CharBuffer.wrap(ca);
171                 try {
172                     CoderResult cr = cd.decode(bb, cb, true);
173                     if (!cr.isUnderflow())
174                         cr.throwException();
175                     cr = cd.flush(cb);
176                     if (!cr.isUnderflow())
177                         cr.throwException();
178                 } catch (CharacterCodingException x) {
179                     // Substitution is always enabled,
180                     // so this shouldn't happen
181                     throw new Error(x);
182                 }
183                 return safeTrim(ca, cb.position(), cs, isTrusted);
184             }
185         }
186     }
187 
decode(String charsetName, byte[] ba, int off, int len)188     static char[] decode(String charsetName, byte[] ba, int off, int len)
189         throws UnsupportedEncodingException
190     {
191         StringDecoder sd = deref(decoder);
192         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
193         if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
194                               || csn.equals(sd.charsetName()))) {
195             sd = null;
196             try {
197                 Charset cs = lookupCharset(csn);
198                 if (cs != null)
199                     sd = new StringDecoder(cs, csn);
200             } catch (IllegalCharsetNameException x) {}
201             if (sd == null)
202                 throw new UnsupportedEncodingException(csn);
203             set(decoder, sd);
204         }
205         return sd.decode(ba, off, len);
206     }
207 
decode(Charset cs, byte[] ba, int off, int len)208     static char[] decode(Charset cs, byte[] ba, int off, int len) {
209         // (1)We never cache the "external" cs, the only benefit of creating
210         // an additional StringDe/Encoder object to wrap it is to share the
211         // de/encode() method. These SD/E objects are short-lifed, the young-gen
212         // gc should be able to take care of them well. But the best approash
213         // is still not to generate them if not really necessary.
214         // (2)The defensive copy of the input byte/char[] has a big performance
215         // impact, as well as the outgoing result byte/char[]. Need to do the
216         // optimization check of (sm==null && classLoader0==null) for both.
217         // (3)getClass().getClassLoader0() is expensive
218         // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
219         // is only chcked (and then isTrusted gets set) when (SM==null). It is
220         // possible that the SM==null for now but then SM is NOT null later
221         // when safeTrim() is invoked...the "safe" way to do is to redundant
222         // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
223         // but it then can be argued that the SM is null when the opertaion
224         // is started...
225         CharsetDecoder cd = cs.newDecoder();
226         int en = scale(len, cd.maxCharsPerByte());
227         char[] ca = new char[en];
228         if (len == 0)
229             return ca;
230         boolean isTrusted = false;
231         if (System.getSecurityManager() != null) {
232             if (!(isTrusted = (cs.getClass().getClassLoader() == null))) {
233                 ba =  Arrays.copyOfRange(ba, off, off + len);
234                 off = 0;
235             }
236         }
237         cd.onMalformedInput(CodingErrorAction.REPLACE)
238           .onUnmappableCharacter(CodingErrorAction.REPLACE)
239           .reset();
240         if (cd instanceof ArrayDecoder) {
241             int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
242             return safeTrim(ca, clen, cs, isTrusted);
243         } else {
244             ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
245             CharBuffer cb = CharBuffer.wrap(ca);
246             try {
247                 CoderResult cr = cd.decode(bb, cb, true);
248                 if (!cr.isUnderflow())
249                     cr.throwException();
250                 cr = cd.flush(cb);
251                 if (!cr.isUnderflow())
252                     cr.throwException();
253             } catch (CharacterCodingException x) {
254                 // Substitution is always enabled,
255                 // so this shouldn't happen
256                 throw new Error(x);
257             }
258             return safeTrim(ca, cb.position(), cs, isTrusted);
259         }
260     }
261 
decode(byte[] ba, int off, int len)262     static char[] decode(byte[] ba, int off, int len) {
263         String csn = Charset.defaultCharset().name();
264         try {
265             // use charset name decode() variant which provides caching.
266             return decode(csn, ba, off, len);
267         } catch (UnsupportedEncodingException x) {
268             warnUnsupportedCharset(csn);
269         }
270         try {
271             return decode("ISO-8859-1", ba, off, len);
272         } catch (UnsupportedEncodingException x) {
273             // If this code is hit during VM initialization, MessageUtils is
274             // the only way we will be able to get any kind of error message.
275             MessageUtils.err("ISO-8859-1 charset not available: "
276                              + x.toString());
277             // If we can not find ISO-8859-1 (a required encoding) then things
278             // are seriously wrong with the installation.
279             System.exit(1);
280             return null;
281         }
282     }
283 
284     // -- Encoding --
285     private static class StringEncoder {
286         private Charset cs;
287         private CharsetEncoder ce;
288         private final String requestedCharsetName;
289         private final boolean isTrusted;
290 
StringEncoder(Charset cs, String rcn)291         private StringEncoder(Charset cs, String rcn) {
292             this.requestedCharsetName = rcn;
293             this.cs = cs;
294             this.ce = cs.newEncoder()
295                 .onMalformedInput(CodingErrorAction.REPLACE)
296                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
297             this.isTrusted = (cs.getClass().getClassLoader() == null);
298         }
299 
charsetName()300         String charsetName() {
301             if (cs instanceof HistoricallyNamedCharset)
302                 return ((HistoricallyNamedCharset)cs).historicalName();
303             return cs.name();
304         }
305 
requestedCharsetName()306         final String requestedCharsetName() {
307             return requestedCharsetName;
308         }
309 
encode(char[] ca, int off, int len)310         byte[] encode(char[] ca, int off, int len) {
311             int en = scale(len, ce.maxBytesPerChar());
312             if (len == 0)
313                 return new byte[0];
314             if (ce instanceof ArrayEncoder) {
315                 byte[] ba = new byte[en];
316                 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
317                 return safeTrim(ba, blen, cs, isTrusted);
318             } else {
319                 ce.reset();
320                 CharBuffer cb = CharBuffer.wrap(ca, off, len);
321                 try {
322                     /* ----- BEGIN android -----
323                     CoderResult cr = ce.encode(cb, bb, true);
324                     Pass read-only buffer, so the encoder can't alter it */
325                     ByteBuffer bb = ce.encode(cb.asReadOnlyBuffer());
326                     return safeTrim(bb.array(), bb.limit(), cs, isTrusted);
327                 } catch (CharacterCodingException x) {
328                     // Substitution is always enabled,
329                     // so this shouldn't happen
330                     throw new Error(x);
331                 }
332             }
333         }
334     }
335 
encode(String charsetName, char[] ca, int off, int len)336     static byte[] encode(String charsetName, char[] ca, int off, int len)
337         throws UnsupportedEncodingException
338     {
339         StringEncoder se = deref(encoder);
340         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
341         if ((se == null) || !(csn.equals(se.requestedCharsetName())
342                               || csn.equals(se.charsetName()))) {
343             se = null;
344             try {
345                 Charset cs = lookupCharset(csn);
346                 if (cs != null)
347                     se = new StringEncoder(cs, csn);
348             } catch (IllegalCharsetNameException x) {}
349             if (se == null)
350                 throw new UnsupportedEncodingException (csn);
351             set(encoder, se);
352         }
353         return se.encode(ca, off, len);
354     }
355 
encode(Charset cs, char[] ca, int off, int len)356     static byte[] encode(Charset cs, char[] ca, int off, int len) {
357         CharsetEncoder ce = cs.newEncoder();
358         int en = scale(len, ce.maxBytesPerChar());
359         byte[] ba = new byte[en];
360         if (len == 0)
361             return ba;
362         boolean isTrusted = false;
363         if (System.getSecurityManager() != null) {
364             if (!(isTrusted = (cs.getClass().getClassLoader() == null))) {
365                 ca =  Arrays.copyOfRange(ca, off, off + len);
366                 off = 0;
367             }
368         }
369         ce.onMalformedInput(CodingErrorAction.REPLACE)
370           .onUnmappableCharacter(CodingErrorAction.REPLACE)
371           .reset();
372         if (ce instanceof ArrayEncoder) {
373             int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
374             return safeTrim(ba, blen, cs, isTrusted);
375         } else {
376             ByteBuffer bb = ByteBuffer.wrap(ba);
377             CharBuffer cb = CharBuffer.wrap(ca, off, len);
378             try {
379                 /* ----- BEGIN android -----
380                    CoderResult cr = ce.encode(cb, bb, true);
381                    Pass read-only buffer, so the encoder can't alter it */
382                 CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true);
383                 if (!cr.isUnderflow())
384                     cr.throwException();
385                 cr = ce.flush(bb);
386                 if (!cr.isUnderflow())
387                     cr.throwException();
388             } catch (CharacterCodingException x) {
389                 throw new Error(x);
390             }
391             return safeTrim(ba, bb.position(), cs, isTrusted);
392         }
393     }
394 
encode(Charset cs, String str)395     static byte[] encode(Charset cs, String str) {
396         ByteBuffer buffer = cs.encode(str);
397         byte[] bytes = new byte[buffer.limit()];
398         buffer.get(bytes);
399         return bytes;
400     }
401 
encode(char[] ca, int off, int len)402     static byte[] encode(char[] ca, int off, int len) {
403         String csn = Charset.defaultCharset().name();
404         try {
405             // use charset name encode() variant which provides caching.
406             return encode(csn, ca, off, len);
407         } catch (UnsupportedEncodingException x) {
408             warnUnsupportedCharset(csn);
409         }
410         try {
411             return encode("ISO-8859-1", ca, off, len);
412         } catch (UnsupportedEncodingException x) {
413             // If this code is hit during VM initialization, MessageUtils is
414             // the only way we will be able to get any kind of error message.
415             MessageUtils.err("ISO-8859-1 charset not available: "
416                              + x.toString());
417             // If we can not find ISO-8859-1 (a required encoding) then things
418             // are seriously wrong with the installation.
419             System.exit(1);
420             return null;
421         }
422     }
423 }
424