• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2001-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.apache.commons.codec.net;
18 
19 import java.io.ByteArrayOutputStream;
20 import java.io.UnsupportedEncodingException;
21 import java.util.BitSet;
22 
23 import org.apache.commons.codec.BinaryDecoder;
24 import org.apache.commons.codec.BinaryEncoder;
25 import org.apache.commons.codec.DecoderException;
26 import org.apache.commons.codec.EncoderException;
27 import org.apache.commons.codec.StringDecoder;
28 import org.apache.commons.codec.StringEncoder;
29 
30 /**
31  * <p>Implements the 'www-form-urlencoded' encoding scheme,
32  * also misleadingly known as URL encoding.</p>
33  *
34  * <p>For more detailed information please refer to
35  * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
36  * Chapter 17.13.4 'Form content types'</a> of the
37  * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
38  *
39  * <p>
40  * This codec is meant to be a replacement for standard Java classes
41  * {@link java.net.URLEncoder} and {@link java.net.URLDecoder}
42  * on older Java platforms, as these classes in Java versions below
43  * 1.4 rely on the platform's default charset encoding.
44  * </p>
45  *
46  * @author Apache Software Foundation
47  * @since 1.2
48  * @version $Id: URLCodec.java,v 1.19 2004/03/29 07:59:00 ggregory Exp $
49  */
50 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
51 
52     /**
53      * The default charset used for string decoding and encoding.
54      */
55     protected String charset = StringEncodings.UTF8;
56 
57     protected static byte ESCAPE_CHAR = '%';
58     /**
59      * BitSet of www-form-url safe characters.
60      */
61     protected static final BitSet WWW_FORM_URL = new BitSet(256);
62 
63     // Static initializer for www_form_url
64     static {
65         // alpha characters
66         for (int i = 'a'; i <= 'z'; i++) {
67             WWW_FORM_URL.set(i);
68         }
69         for (int i = 'A'; i <= 'Z'; i++) {
70             WWW_FORM_URL.set(i);
71         }
72         // numeric characters
73         for (int i = '0'; i <= '9'; i++) {
74             WWW_FORM_URL.set(i);
75         }
76         // special chars
77         WWW_FORM_URL.set('-');
78         WWW_FORM_URL.set('_');
79         WWW_FORM_URL.set('.');
80         WWW_FORM_URL.set('*');
81         // blank to be replaced with +
82         WWW_FORM_URL.set(' ');
83     }
84 
85 
86     /**
87      * Default constructor.
88      */
URLCodec()89     public URLCodec() {
90         super();
91     }
92 
93     /**
94      * Constructor which allows for the selection of a default charset
95      *
96      * @param charset the default string charset to use.
97      */
URLCodec(String charset)98     public URLCodec(String charset) {
99         super();
100         this.charset = charset;
101     }
102 
103     /**
104      * Encodes an array of bytes into an array of URL safe 7-bit
105      * characters. Unsafe characters are escaped.
106      *
107      * @param urlsafe bitset of characters deemed URL safe
108      * @param bytes array of bytes to convert to URL safe characters
109      * @return array of bytes containing URL safe characters
110      */
encodeUrl(BitSet urlsafe, byte[] bytes)111     public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes)
112     {
113         if (bytes == null) {
114             return null;
115         }
116         if (urlsafe == null) {
117             urlsafe = WWW_FORM_URL;
118         }
119 
120         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
121         for (int i = 0; i < bytes.length; i++) {
122             int b = bytes[i];
123             if (b < 0) {
124                 b = 256 + b;
125             }
126             if (urlsafe.get(b)) {
127                 if (b == ' ') {
128                     b = '+';
129                 }
130                 buffer.write(b);
131             } else {
132                 buffer.write('%');
133                 char hex1 = Character.toUpperCase(
134                   Character.forDigit((b >> 4) & 0xF, 16));
135                 char hex2 = Character.toUpperCase(
136                   Character.forDigit(b & 0xF, 16));
137                 buffer.write(hex1);
138                 buffer.write(hex2);
139             }
140         }
141         return buffer.toByteArray();
142     }
143 
144 
145     /**
146      * Decodes an array of URL safe 7-bit characters into an array of
147      * original bytes. Escaped characters are converted back to their
148      * original representation.
149      *
150      * @param bytes array of URL safe characters
151      * @return array of original bytes
152      * @throws DecoderException Thrown if URL decoding is unsuccessful
153      */
decodeUrl(byte[] bytes)154     public static final byte[] decodeUrl(byte[] bytes)
155          throws DecoderException
156     {
157         if (bytes == null) {
158             return null;
159         }
160         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
161         for (int i = 0; i < bytes.length; i++) {
162             int b = bytes[i];
163             if (b == '+') {
164                 buffer.write(' ');
165             } else if (b == '%') {
166                 try {
167                     int u = Character.digit((char)bytes[++i], 16);
168                     int l = Character.digit((char)bytes[++i], 16);
169                     if (u == -1 || l == -1) {
170                         throw new DecoderException("Invalid URL encoding");
171                     }
172                     buffer.write((char)((u << 4) + l));
173                 } catch(ArrayIndexOutOfBoundsException e) {
174                     throw new DecoderException("Invalid URL encoding");
175                 }
176             } else {
177                 buffer.write(b);
178             }
179         }
180         return buffer.toByteArray();
181     }
182 
183 
184     /**
185      * Encodes an array of bytes into an array of URL safe 7-bit
186      * characters. Unsafe characters are escaped.
187      *
188      * @param bytes array of bytes to convert to URL safe characters
189      * @return array of bytes containing URL safe characters
190      */
encode(byte[] bytes)191     public byte[] encode(byte[] bytes) {
192         return encodeUrl(WWW_FORM_URL, bytes);
193     }
194 
195 
196     /**
197      * Decodes an array of URL safe 7-bit characters into an array of
198      * original bytes. Escaped characters are converted back to their
199      * original representation.
200      *
201      * @param bytes array of URL safe characters
202      * @return array of original bytes
203      * @throws DecoderException Thrown if URL decoding is unsuccessful
204      */
decode(byte[] bytes)205     public byte[] decode(byte[] bytes) throws DecoderException {
206         return decodeUrl(bytes);
207     }
208 
209 
210     /**
211      * Encodes a string into its URL safe form using the specified
212      * string charset. Unsafe characters are escaped.
213      *
214      * @param pString string to convert to a URL safe form
215      * @param charset the charset for pString
216      * @return URL safe string
217      * @throws UnsupportedEncodingException Thrown if charset is not
218      *                                      supported
219      */
encode(String pString, String charset)220     public String encode(String pString, String charset)
221         throws UnsupportedEncodingException
222     {
223         if (pString == null) {
224             return null;
225         }
226         return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
227     }
228 
229 
230     /**
231      * Encodes a string into its URL safe form using the default string
232      * charset. Unsafe characters are escaped.
233      *
234      * @param pString string to convert to a URL safe form
235      * @return URL safe string
236      * @throws EncoderException Thrown if URL encoding is unsuccessful
237      *
238      * @see #getDefaultCharset()
239      */
encode(String pString)240     public String encode(String pString) throws EncoderException {
241         if (pString == null) {
242             return null;
243         }
244         try {
245             return encode(pString, getDefaultCharset());
246         } catch(UnsupportedEncodingException e) {
247             throw new EncoderException(e.getMessage());
248         }
249     }
250 
251 
252     /**
253      * Decodes a URL safe string into its original form using the
254      * specified encoding. Escaped characters are converted back
255      * to their original representation.
256      *
257      * @param pString URL safe string to convert into its original form
258      * @param charset the original string charset
259      * @return original string
260      * @throws DecoderException Thrown if URL decoding is unsuccessful
261      * @throws UnsupportedEncodingException Thrown if charset is not
262      *                                      supported
263      */
decode(String pString, String charset)264     public String decode(String pString, String charset)
265         throws DecoderException, UnsupportedEncodingException
266     {
267         if (pString == null) {
268             return null;
269         }
270         return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
271     }
272 
273 
274     /**
275      * Decodes a URL safe string into its original form using the default
276      * string charset. Escaped characters are converted back to their
277      * original representation.
278      *
279      * @param pString URL safe string to convert into its original form
280      * @return original string
281      * @throws DecoderException Thrown if URL decoding is unsuccessful
282      *
283      * @see #getDefaultCharset()
284      */
decode(String pString)285     public String decode(String pString) throws DecoderException {
286         if (pString == null) {
287             return null;
288         }
289         try {
290             return decode(pString, getDefaultCharset());
291         } catch(UnsupportedEncodingException e) {
292             throw new DecoderException(e.getMessage());
293         }
294     }
295 
296     /**
297      * Encodes an object into its URL safe form. Unsafe characters are
298      * escaped.
299      *
300      * @param pObject string to convert to a URL safe form
301      * @return URL safe object
302      * @throws EncoderException Thrown if URL encoding is not
303      *                          applicable to objects of this type or
304      *                          if encoding is unsuccessful
305      */
encode(Object pObject)306     public Object encode(Object pObject) throws EncoderException {
307         if (pObject == null) {
308             return null;
309         } else if (pObject instanceof byte[]) {
310             return encode((byte[])pObject);
311         } else if (pObject instanceof String) {
312             return encode((String)pObject);
313         } else {
314             throw new EncoderException("Objects of type " +
315                 pObject.getClass().getName() + " cannot be URL encoded");
316 
317         }
318     }
319 
320     /**
321      * Decodes a URL safe object into its original form. Escaped
322      * characters are converted back to their original representation.
323      *
324      * @param pObject URL safe object to convert into its original form
325      * @return original object
326      * @throws DecoderException Thrown if URL decoding is not
327      *                          applicable to objects of this type
328      *                          if decoding is unsuccessful
329      */
decode(Object pObject)330     public Object decode(Object pObject) throws DecoderException {
331         if (pObject == null) {
332             return null;
333         } else if (pObject instanceof byte[]) {
334             return decode((byte[])pObject);
335         } else if (pObject instanceof String) {
336             return decode((String)pObject);
337         } else {
338             throw new DecoderException("Objects of type " +
339                 pObject.getClass().getName() + " cannot be URL decoded");
340 
341         }
342     }
343 
344     /**
345      * The <code>String</code> encoding used for decoding and encoding.
346      *
347      * @return Returns the encoding.
348      *
349      * @deprecated use #getDefaultCharset()
350      */
getEncoding()351     public String getEncoding() {
352         return this.charset;
353     }
354 
355     /**
356      * The default charset used for string decoding and encoding.
357      *
358      * @return the default string charset.
359      */
getDefaultCharset()360     public String getDefaultCharset() {
361         return this.charset;
362     }
363 
364 }
365