• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2001-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.apache.commons.codec.net;
18 
19 import java.io.ByteArrayOutputStream;
20 import java.io.UnsupportedEncodingException;
21 import java.util.BitSet;
22 import org.apache.commons.codec.BinaryDecoder;
23 import org.apache.commons.codec.BinaryEncoder;
24 import org.apache.commons.codec.DecoderException;
25 import org.apache.commons.codec.EncoderException;
26 import org.apache.commons.codec.StringDecoder;
27 import org.apache.commons.codec.StringEncoder;
28 
29 /**
30  * <p>
31  * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
32  * </p>
33  * <p>
34  * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
35  * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
36  * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
37  * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
38  * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
39  * gateway.
40  * </p>
41  *
42  * <p>
43  * Note:
44  * </p>
45  * <p>
46  * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
47  * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
48  * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
49  * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
50  * </p>
51  *
52  * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
53  *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
54  *
55  * @author Apache Software Foundation
56  * @since 1.3
57  * @version $Id: QuotedPrintableCodec.java,v 1.7 2004/04/09 22:21:07 ggregory Exp $
58  */
59 public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
60     /**
61      * The default charset used for string decoding and encoding.
62      */
63     private String charset = StringEncodings.UTF8;
64 
65     /**
66      * BitSet of printable characters as defined in RFC 1521.
67      */
68     private static final BitSet PRINTABLE_CHARS = new BitSet(256);
69 
70     private static byte ESCAPE_CHAR = '=';
71 
72     private static byte TAB = 9;
73 
74     private static byte SPACE = 32;
75     // Static initializer for printable chars collection
76     static {
77         // alpha characters
78         for (int i = 33; i <= 60; i++) {
79             PRINTABLE_CHARS.set(i);
80         }
81         for (int i = 62; i <= 126; i++) {
82             PRINTABLE_CHARS.set(i);
83         }
84         PRINTABLE_CHARS.set(TAB);
85         PRINTABLE_CHARS.set(SPACE);
86     }
87 
88     /**
89      * Default constructor.
90      */
QuotedPrintableCodec()91     public QuotedPrintableCodec() {
92         super();
93     }
94 
95     /**
96      * Constructor which allows for the selection of a default charset
97      *
98      * @param charset
99      *                  the default string charset to use.
100      */
QuotedPrintableCodec(String charset)101     public QuotedPrintableCodec(String charset) {
102         super();
103         this.charset = charset;
104     }
105 
106     /**
107      * Encodes byte into its quoted-printable representation.
108      *
109      * @param b
110      *                  byte to encode
111      * @param buffer
112      *                  the buffer to write to
113      */
encodeQuotedPrintable(int b, ByteArrayOutputStream buffer)114     private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
115         buffer.write(ESCAPE_CHAR);
116         char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
117         char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
118         buffer.write(hex1);
119         buffer.write(hex2);
120     }
121 
122     /**
123      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
124      *
125      * <p>
126      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
127      * RFC 1521 and is suitable for encoding binary data and unformatted text.
128      * </p>
129      *
130      * @param printable
131      *                  bitset of characters deemed quoted-printable
132      * @param bytes
133      *                  array of bytes to be encoded
134      * @return array of bytes containing quoted-printable data
135      */
encodeQuotedPrintable(BitSet printable, byte[] bytes)136     public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
137         if (bytes == null) {
138             return null;
139         }
140         if (printable == null) {
141             printable = PRINTABLE_CHARS;
142         }
143         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
144         for (int i = 0; i < bytes.length; i++) {
145             int b = bytes[i];
146             if (b < 0) {
147                 b = 256 + b;
148             }
149             if (printable.get(b)) {
150                 buffer.write(b);
151             } else {
152                 encodeQuotedPrintable(b, buffer);
153             }
154         }
155         return buffer.toByteArray();
156     }
157 
158     /**
159      * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
160      * back to their original representation.
161      *
162      * <p>
163      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
164      * RFC 1521.
165      * </p>
166      *
167      * @param bytes
168      *                  array of quoted-printable characters
169      * @return array of original bytes
170      * @throws DecoderException
171      *                  Thrown if quoted-printable decoding is unsuccessful
172      */
decodeQuotedPrintable(byte[] bytes)173     public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
174         if (bytes == null) {
175             return null;
176         }
177         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
178         for (int i = 0; i < bytes.length; i++) {
179             int b = bytes[i];
180             if (b == ESCAPE_CHAR) {
181                 try {
182                     int u = Character.digit((char) bytes[++i], 16);
183                     int l = Character.digit((char) bytes[++i], 16);
184                     if (u == -1 || l == -1) {
185                         throw new DecoderException("Invalid quoted-printable encoding");
186                     }
187                     buffer.write((char) ((u << 4) + l));
188                 } catch (ArrayIndexOutOfBoundsException e) {
189                     throw new DecoderException("Invalid quoted-printable encoding");
190                 }
191             } else {
192                 buffer.write(b);
193             }
194         }
195         return buffer.toByteArray();
196     }
197 
198     /**
199      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
200      *
201      * <p>
202      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
203      * RFC 1521 and is suitable for encoding binary data and unformatted text.
204      * </p>
205      *
206      * @param bytes
207      *                  array of bytes to be encoded
208      * @return array of bytes containing quoted-printable data
209      */
encode(byte[] bytes)210     public byte[] encode(byte[] bytes) {
211         return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
212     }
213 
214     /**
215      * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
216      * back to their original representation.
217      *
218      * <p>
219      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
220      * RFC 1521.
221      * </p>
222      *
223      * @param bytes
224      *                  array of quoted-printable characters
225      * @return array of original bytes
226      * @throws DecoderException
227      *                  Thrown if quoted-printable decoding is unsuccessful
228      */
decode(byte[] bytes)229     public byte[] decode(byte[] bytes) throws DecoderException {
230         return decodeQuotedPrintable(bytes);
231     }
232 
233     /**
234      * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
235      *
236      * <p>
237      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
238      * RFC 1521 and is suitable for encoding binary data.
239      * </p>
240      *
241      * @param pString
242      *                  string to convert to quoted-printable form
243      * @return quoted-printable string
244      *
245      * @throws EncoderException
246      *                  Thrown if quoted-printable encoding is unsuccessful
247      *
248      * @see #getDefaultCharset()
249      */
encode(String pString)250     public String encode(String pString) throws EncoderException {
251         if (pString == null) {
252             return null;
253         }
254         try {
255             return encode(pString, getDefaultCharset());
256         } catch (UnsupportedEncodingException e) {
257             throw new EncoderException(e.getMessage());
258         }
259     }
260 
261     /**
262      * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
263      * are converted back to their original representation.
264      *
265      * @param pString
266      *                  quoted-printable string to convert into its original form
267      * @param charset
268      *                  the original string charset
269      * @return original string
270      * @throws DecoderException
271      *                  Thrown if quoted-printable decoding is unsuccessful
272      * @throws UnsupportedEncodingException
273      *                  Thrown if charset is not supported
274      */
decode(String pString, String charset)275     public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
276         if (pString == null) {
277             return null;
278         }
279         return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
280     }
281 
282     /**
283      * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
284      * converted back to their original representation.
285      *
286      * @param pString
287      *                  quoted-printable string to convert into its original form
288      * @return original string
289      * @throws DecoderException
290      *                  Thrown if quoted-printable decoding is unsuccessful
291      * @throws UnsupportedEncodingException
292      *                  Thrown if charset is not supported
293      * @see #getDefaultCharset()
294      */
decode(String pString)295     public String decode(String pString) throws DecoderException {
296         if (pString == null) {
297             return null;
298         }
299         try {
300             return decode(pString, getDefaultCharset());
301         } catch (UnsupportedEncodingException e) {
302             throw new DecoderException(e.getMessage());
303         }
304     }
305 
306     /**
307      * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
308      *
309      * @param pObject
310      *                  string to convert to a quoted-printable form
311      * @return quoted-printable object
312      * @throws EncoderException
313      *                  Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
314      *                  unsuccessful
315      */
encode(Object pObject)316     public Object encode(Object pObject) throws EncoderException {
317         if (pObject == null) {
318             return null;
319         } else if (pObject instanceof byte[]) {
320             return encode((byte[]) pObject);
321         } else if (pObject instanceof String) {
322             return encode((String) pObject);
323         } else {
324             throw new EncoderException("Objects of type "
325                 + pObject.getClass().getName()
326                 + " cannot be quoted-printable encoded");
327         }
328     }
329 
330     /**
331      * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
332      * representation.
333      *
334      * @param pObject
335      *                  quoted-printable object to convert into its original form
336      * @return original object
337      * @throws DecoderException
338      *                  Thrown if quoted-printable decoding is not applicable to objects of this type if decoding is
339      *                  unsuccessful
340      */
decode(Object pObject)341     public Object decode(Object pObject) throws DecoderException {
342         if (pObject == null) {
343             return null;
344         } else if (pObject instanceof byte[]) {
345             return decode((byte[]) pObject);
346         } else if (pObject instanceof String) {
347             return decode((String) pObject);
348         } else {
349             throw new DecoderException("Objects of type "
350                 + pObject.getClass().getName()
351                 + " cannot be quoted-printable decoded");
352         }
353     }
354 
355     /**
356      * Returns the default charset used for string decoding and encoding.
357      *
358      * @return the default string charset.
359      */
getDefaultCharset()360     public String getDefaultCharset() {
361         return this.charset;
362     }
363 
364     /**
365      * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
366      *
367      * <p>
368      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
369      * RFC 1521 and is suitable for encoding binary data and unformatted text.
370      * </p>
371      *
372      * @param pString
373      *                  string to convert to quoted-printable form
374      * @param charset
375      *                  the charset for pString
376      * @return quoted-printable string
377      *
378      * @throws UnsupportedEncodingException
379      *                  Thrown if the charset is not supported
380      */
encode(String pString, String charset)381     public String encode(String pString, String charset) throws UnsupportedEncodingException {
382         if (pString == null) {
383             return null;
384         }
385         return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
386     }
387 }
388