1 // Copyright 2014 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 package org.chromium.net; 6 7 import org.jni_zero.CalledByNative; 8 import org.jni_zero.JNINamespace; 9 10 import java.nio.ByteBuffer; 11 import java.nio.charset.Charset; 12 import java.nio.charset.CharsetDecoder; 13 import java.nio.charset.CodingErrorAction; 14 import java.text.Normalizer; 15 import java.util.Locale; 16 17 /** 18 * Utility functions for converting strings between formats when not built with 19 * icu. 20 */ 21 @JNINamespace("net::android") 22 public class NetStringUtil { 23 /** 24 * Attempts to convert text in a given character set to a Unicode string. 25 * Returns null on failure. 26 * @param text ByteBuffer containing the character array to convert. 27 * @param charsetName Character set it's in encoded in. 28 * @return: Unicode string on success, null on failure. 29 */ 30 @CalledByNative convertToUnicode(ByteBuffer text, String charsetName)31 private static String convertToUnicode(ByteBuffer text, String charsetName) { 32 try { 33 Charset charset = Charset.forName(charsetName); 34 CharsetDecoder decoder = charset.newDecoder(); 35 // On invalid characters, this will throw an exception. 36 return decoder.decode(text).toString(); 37 } catch (Exception e) { 38 return null; 39 } 40 } 41 42 /** 43 * Attempts to convert text in a given character set to a Unicode string, 44 * and normalize it. Returns null on failure. 45 * @param text ByteBuffer containing the character array to convert. 46 * @param charsetName Character set it's in encoded in. 47 * @return: Unicode string on success, null on failure. 48 */ 49 @CalledByNative convertToUnicodeAndNormalize(ByteBuffer text, String charsetName)50 private static String convertToUnicodeAndNormalize(ByteBuffer text, String charsetName) { 51 String unicodeString = convertToUnicode(text, charsetName); 52 if (unicodeString == null) return null; 53 return Normalizer.normalize(unicodeString, Normalizer.Form.NFC); 54 } 55 56 /** 57 * Convert text in a given character set to a Unicode string. Any invalid 58 * characters are replaced with U+FFFD. Returns null if the character set 59 * is not recognized. 60 * @param text ByteBuffer containing the character array to convert. 61 * @param charsetName Character set it's in encoded in. 62 * @return: Unicode string on success, null on failure. 63 */ 64 @CalledByNative convertToUnicodeWithSubstitutions(ByteBuffer text, String charsetName)65 private static String convertToUnicodeWithSubstitutions(ByteBuffer text, String charsetName) { 66 try { 67 Charset charset = Charset.forName(charsetName); 68 69 // TODO(mmenke): Investigate if Charset.decode() can be used 70 // instead. The question is whether it uses the proper replace 71 // character. JDK CharsetDecoder docs say U+FFFD is the default, 72 // but Charset.decode() docs say it uses the "charset's default 73 // replacement byte array". 74 CharsetDecoder decoder = charset.newDecoder(); 75 decoder.onMalformedInput(CodingErrorAction.REPLACE); 76 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 77 decoder.replaceWith("\uFFFD"); 78 return decoder.decode(text).toString(); 79 } catch (Exception e) { 80 return null; 81 } 82 } 83 84 /** 85 * Convert a string to uppercase. 86 * @param str String to convert. 87 * @return: String converted to uppercase using default locale, 88 * null on failure. 89 */ 90 @CalledByNative toUpperCase(String str)91 private static String toUpperCase(String str) { 92 try { 93 return str.toUpperCase(Locale.getDefault()); 94 } catch (Exception e) { 95 return null; 96 } 97 } 98 } 99