1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.codec.binary; 19 20 import java.io.UnsupportedEncodingException; 21 22 import org.apache.commons.codec.CharEncoding; 23 24 /** 25 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a 26 * href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 27 * 28 * @see CharEncoding 29 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 30 * @version $Id: StringUtils.java 801391 2009-08-05 19:55:54Z ggregory $ 31 * @since 1.4 32 */ 33 public class StringUtils { 34 35 /** 36 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new 37 * byte array. 38 * 39 * @param string 40 * the String to encode 41 * @return encoded bytes 42 * @throws IllegalStateException 43 * Thrown when the charset is missing, which should be never according the the Java specification. 44 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 45 * @see #getBytesUnchecked(String, String) 46 */ getBytesIso8859_1(String string)47 public static byte[] getBytesIso8859_1(String string) { 48 return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1); 49 } 50 51 /** 52 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte 53 * array. 54 * 55 * @param string 56 * the String to encode 57 * @return encoded bytes 58 * @throws IllegalStateException 59 * Thrown when the charset is missing, which should be never according the the Java specification. 60 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 61 * @see #getBytesUnchecked(String, String) 62 */ getBytesUsAscii(String string)63 public static byte[] getBytesUsAscii(String string) { 64 return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII); 65 } 66 67 /** 68 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte 69 * array. 70 * 71 * @param string 72 * the String to encode 73 * @return encoded bytes 74 * @throws IllegalStateException 75 * Thrown when the charset is missing, which should be never according the the Java specification. 76 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 77 * @see #getBytesUnchecked(String, String) 78 */ getBytesUtf16(String string)79 public static byte[] getBytesUtf16(String string) { 80 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16); 81 } 82 83 /** 84 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte 85 * array. 86 * 87 * @param string 88 * the String to encode 89 * @return encoded bytes 90 * @throws IllegalStateException 91 * Thrown when the charset is missing, which should be never according the the Java specification. 92 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 93 * @see #getBytesUnchecked(String, String) 94 */ getBytesUtf16Be(String string)95 public static byte[] getBytesUtf16Be(String string) { 96 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE); 97 } 98 99 /** 100 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte 101 * array. 102 * 103 * @param string 104 * the String to encode 105 * @return encoded bytes 106 * @throws IllegalStateException 107 * Thrown when the charset is missing, which should be never according the the Java specification. 108 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 109 * @see #getBytesUnchecked(String, String) 110 */ getBytesUtf16Le(String string)111 public static byte[] getBytesUtf16Le(String string) { 112 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE); 113 } 114 115 /** 116 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte 117 * array. 118 * 119 * @param string 120 * the String to encode 121 * @return encoded bytes 122 * @throws IllegalStateException 123 * Thrown when the charset is missing, which should be never according the the Java specification. 124 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 125 * @see #getBytesUnchecked(String, String) 126 */ getBytesUtf8(String string)127 public static byte[] getBytesUtf8(String string) { 128 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8); 129 } 130 131 /** 132 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte 133 * array. 134 * <p> 135 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which 136 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 137 * </p> 138 * 139 * @param string 140 * the String to encode 141 * @param charsetName 142 * The name of a required {@link java.nio.charset.Charset} 143 * @return encoded bytes 144 * @throws IllegalStateException 145 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 146 * required charset name. 147 * @see CharEncoding 148 * @see String#getBytes(String) 149 */ getBytesUnchecked(String string, String charsetName)150 public static byte[] getBytesUnchecked(String string, String charsetName) { 151 if (string == null) { 152 return null; 153 } 154 try { 155 return string.getBytes(charsetName); 156 } catch (UnsupportedEncodingException e) { 157 throw StringUtils.newIllegalStateException(charsetName, e); 158 } 159 } 160 newIllegalStateException(String charsetName, UnsupportedEncodingException e)161 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) { 162 return new IllegalStateException(charsetName + ": " + e); 163 } 164 165 /** 166 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 167 * <p> 168 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which 169 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 170 * </p> 171 * 172 * @param bytes 173 * The bytes to be decoded into characters 174 * @param charsetName 175 * The name of a required {@link java.nio.charset.Charset} 176 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 177 * @throws IllegalStateException 178 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 179 * required charset name. 180 * @see CharEncoding 181 * @see String#String(byte[], String) 182 */ newString(byte[] bytes, String charsetName)183 public static String newString(byte[] bytes, String charsetName) { 184 if (bytes == null) { 185 return null; 186 } 187 try { 188 return new String(bytes, charsetName); 189 } catch (UnsupportedEncodingException e) { 190 throw StringUtils.newIllegalStateException(charsetName, e); 191 } 192 } 193 194 /** 195 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset. 196 * 197 * @param bytes 198 * The bytes to be decoded into characters 199 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 200 * @throws IllegalStateException 201 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 202 * charset is required. 203 */ newStringIso8859_1(byte[] bytes)204 public static String newStringIso8859_1(byte[] bytes) { 205 return StringUtils.newString(bytes, CharEncoding.ISO_8859_1); 206 } 207 208 /** 209 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset. 210 * 211 * @param bytes 212 * The bytes to be decoded into characters 213 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 214 * @throws IllegalStateException 215 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 216 * charset is required. 217 */ newStringUsAscii(byte[] bytes)218 public static String newStringUsAscii(byte[] bytes) { 219 return StringUtils.newString(bytes, CharEncoding.US_ASCII); 220 } 221 222 /** 223 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset. 224 * 225 * @param bytes 226 * The bytes to be decoded into characters 227 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 228 * @throws IllegalStateException 229 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 230 * charset is required. 231 */ newStringUtf16(byte[] bytes)232 public static String newStringUtf16(byte[] bytes) { 233 return StringUtils.newString(bytes, CharEncoding.UTF_16); 234 } 235 236 /** 237 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset. 238 * 239 * @param bytes 240 * The bytes to be decoded into characters 241 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 242 * @throws IllegalStateException 243 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 244 * charset is required. 245 */ newStringUtf16Be(byte[] bytes)246 public static String newStringUtf16Be(byte[] bytes) { 247 return StringUtils.newString(bytes, CharEncoding.UTF_16BE); 248 } 249 250 /** 251 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset. 252 * 253 * @param bytes 254 * The bytes to be decoded into characters 255 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 256 * @throws IllegalStateException 257 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 258 * charset is required. 259 */ newStringUtf16Le(byte[] bytes)260 public static String newStringUtf16Le(byte[] bytes) { 261 return StringUtils.newString(bytes, CharEncoding.UTF_16LE); 262 } 263 264 /** 265 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset. 266 * 267 * @param bytes 268 * The bytes to be decoded into characters 269 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 270 * @throws IllegalStateException 271 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 272 * charset is required. 273 */ newStringUtf8(byte[] bytes)274 public static String newStringUtf8(byte[] bytes) { 275 return StringUtils.newString(bytes, CharEncoding.UTF_8); 276 } 277 278 } 279