• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /****************************************************************
2  * Licensed to the Apache Software Foundation (ASF) under one   *
3  * or more contributor license agreements.  See the NOTICE file *
4  * distributed with this work for additional information        *
5  * regarding copyright ownership.  The ASF licenses this file   *
6  * to you under the Apache License, Version 2.0 (the            *
7  * "License"); you may not use this file except in compliance   *
8  * with the License.  You may obtain a copy of the License at   *
9  *                                                              *
10  *   http://www.apache.org/licenses/LICENSE-2.0                 *
11  *                                                              *
12  * Unless required by applicable law or agreed to in writing,   *
13  * software distributed under the License is distributed on an  *
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15  * KIND, either express or implied.  See the License for the    *
16  * specific language governing permissions and limitations      *
17  * under the License.                                           *
18  ****************************************************************/
19 
20 package org.apache.james.mime4j.decoder;
21 
22 //BEGIN android-changed: Stubbing out logging
23 import org.apache.james.mime4j.Log;
24 import org.apache.james.mime4j.LogFactory;
25 //END android-changed
26 import org.apache.james.mime4j.util.CharsetUtil;
27 
28 import java.io.ByteArrayInputStream;
29 import java.io.ByteArrayOutputStream;
30 import java.io.IOException;
31 import java.io.UnsupportedEncodingException;
32 
33 /**
34  * Static methods for decoding strings, byte arrays and encoded words.
35  *
36  *
37  * @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $
38  */
39 public class DecoderUtil {
40     private static Log log = LogFactory.getLog(DecoderUtil.class);
41 
42     /**
43      * Decodes a string containing quoted-printable encoded data.
44      *
45      * @param s the string to decode.
46      * @return the decoded bytes.
47      */
decodeBaseQuotedPrintable(String s)48     public static byte[] decodeBaseQuotedPrintable(String s) {
49         ByteArrayOutputStream baos = new ByteArrayOutputStream();
50 
51         try {
52             byte[] bytes = s.getBytes("US-ASCII");
53 
54             QuotedPrintableInputStream is = new QuotedPrintableInputStream(
55                                                new ByteArrayInputStream(bytes));
56 
57             int b = 0;
58             while ((b = is.read()) != -1) {
59                 baos.write(b);
60             }
61         } catch (IOException e) {
62             /*
63              * This should never happen!
64              */
65             log.error(e);
66         }
67 
68         return baos.toByteArray();
69     }
70 
71     /**
72      * Decodes a string containing base64 encoded data.
73      *
74      * @param s the string to decode.
75      * @return the decoded bytes.
76      */
decodeBase64(String s)77     public static byte[] decodeBase64(String s) {
78         ByteArrayOutputStream baos = new ByteArrayOutputStream();
79 
80         try {
81             byte[] bytes = s.getBytes("US-ASCII");
82 
83             Base64InputStream is = new Base64InputStream(
84                                         new ByteArrayInputStream(bytes));
85 
86             int b = 0;
87             while ((b = is.read()) != -1) {
88                 baos.write(b);
89             }
90         } catch (IOException e) {
91             /*
92              * This should never happen!
93              */
94             log.error(e);
95         }
96 
97         return baos.toByteArray();
98     }
99 
100     /**
101      * Decodes an encoded word encoded with the 'B' encoding (described in
102      * RFC 2047) found in a header field body.
103      *
104      * @param encodedWord the encoded word to decode.
105      * @param charset the Java charset to use.
106      * @return the decoded string.
107      * @throws UnsupportedEncodingException if the given Java charset isn't
108      *         supported.
109      */
decodeB(String encodedWord, String charset)110     public static String decodeB(String encodedWord, String charset)
111             throws UnsupportedEncodingException {
112 
113         return new String(decodeBase64(encodedWord), charset);
114     }
115 
116     /**
117      * Decodes an encoded word encoded with the 'Q' encoding (described in
118      * RFC 2047) found in a header field body.
119      *
120      * @param encodedWord the encoded word to decode.
121      * @param charset the Java charset to use.
122      * @return the decoded string.
123      * @throws UnsupportedEncodingException if the given Java charset isn't
124      *         supported.
125      */
decodeQ(String encodedWord, String charset)126     public static String decodeQ(String encodedWord, String charset)
127             throws UnsupportedEncodingException {
128 
129         /*
130          * Replace _ with =20
131          */
132         StringBuffer sb = new StringBuffer();
133         for (int i = 0; i < encodedWord.length(); i++) {
134             char c = encodedWord.charAt(i);
135             if (c == '_') {
136                 sb.append("=20");
137             } else {
138                 sb.append(c);
139             }
140         }
141 
142         return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
143     }
144 
145     /**
146      * Decodes a string containing encoded words as defined by RFC 2047.
147      * Encoded words in have the form
148      * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
149      * quoted-printable and 'B' or 'b' for Base64.
150      *
151      * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
152      *
153      * @param body the string to decode.
154      * @return the decoded string.
155      */
decodeEncodedWords(String body)156     public static String decodeEncodedWords(String body) {
157 
158         // ANDROID:  Most strings will not include "=?" so a quick test can prevent unneeded
159         // object creation.  This could also be handled via lazy creation of the StringBuilder.
160         if (body.indexOf("=?") == -1) {
161             return body;
162         }
163 
164         int previousEnd = 0;
165         boolean previousWasEncoded = false;
166 
167         StringBuilder sb = new StringBuilder();
168 
169         while (true) {
170             int begin = body.indexOf("=?", previousEnd);
171 
172             // ANDROID:  The mime4j original version has an error here.  It gets confused if
173             // the encoded string begins with an '=' (just after "?Q?").  This patch seeks forward
174             // to find the two '?' in the "header", before looking for the final "?=".
175             int endScan = begin + 2;
176             if (begin != -1) {
177                 int qm1 = body.indexOf('?', endScan + 2);
178                 int qm2 = body.indexOf('?', qm1 + 1);
179                 if (qm2 != -1) {
180                     endScan = qm2 + 1;
181                 }
182             }
183 
184             int end = begin == -1 ? -1 : body.indexOf("?=", endScan);
185             if (end == -1) {
186                 if (previousEnd == 0)
187                     return body;
188 
189                 sb.append(body.substring(previousEnd));
190                 return sb.toString();
191             }
192             end += 2;
193 
194             String sep = body.substring(previousEnd, begin);
195 
196             String decoded = decodeEncodedWord(body, begin, end);
197             if (decoded == null) {
198                 sb.append(sep);
199                 sb.append(body.substring(begin, end));
200             } else {
201                 if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
202                     sb.append(sep);
203                 }
204                 sb.append(decoded);
205             }
206 
207             previousEnd = end;
208             previousWasEncoded = decoded != null;
209         }
210     }
211 
212     // return null on error
decodeEncodedWord(String body, int begin, int end)213     private static String decodeEncodedWord(String body, int begin, int end) {
214         int qm1 = body.indexOf('?', begin + 2);
215         if (qm1 == end - 2)
216             return null;
217 
218         int qm2 = body.indexOf('?', qm1 + 1);
219         if (qm2 == end - 2)
220             return null;
221 
222         String mimeCharset = body.substring(begin + 2, qm1);
223         String encoding = body.substring(qm1 + 1, qm2);
224         String encodedText = body.substring(qm2 + 1, end - 2);
225 
226         String charset = CharsetUtil.toJavaCharset(mimeCharset);
227         if (charset == null) {
228             if (log.isWarnEnabled()) {
229                 log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
230                         + body.substring(begin, end) + "' doesn't have a "
231                         + "corresponding Java charset");
232             }
233             return null;
234         } else if (!CharsetUtil.isDecodingSupported(charset)) {
235             if (log.isWarnEnabled()) {
236                 log.warn("Current JDK doesn't support decoding of charset '"
237                         + charset + "' (MIME charset '" + mimeCharset
238                         + "' in encoded word '" + body.substring(begin, end)
239                         + "')");
240             }
241             return null;
242         }
243 
244         if (encodedText.length() == 0) {
245             if (log.isWarnEnabled()) {
246                 log.warn("Missing encoded text in encoded word: '"
247                         + body.substring(begin, end) + "'");
248             }
249             return null;
250         }
251 
252         try {
253             if (encoding.equalsIgnoreCase("Q")) {
254                 return DecoderUtil.decodeQ(encodedText, charset);
255             } else if (encoding.equalsIgnoreCase("B")) {
256                 return DecoderUtil.decodeB(encodedText, charset);
257             } else {
258                 if (log.isWarnEnabled()) {
259                     log.warn("Warning: Unknown encoding in encoded word '"
260                             + body.substring(begin, end) + "'");
261                 }
262                 return null;
263             }
264         } catch (UnsupportedEncodingException e) {
265             // should not happen because of isDecodingSupported check above
266             if (log.isWarnEnabled()) {
267                 log.warn("Unsupported encoding in encoded word '"
268                         + body.substring(begin, end) + "'", e);
269             }
270             return null;
271         } catch (RuntimeException e) {
272             if (log.isWarnEnabled()) {
273                 log.warn("Could not decode encoded word '"
274                         + body.substring(begin, end) + "'", e);
275             }
276             return null;
277         }
278     }
279 }
280