• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.phone.common.mail.internet;
17 
18 import android.text.TextUtils;
19 import android.util.Base64;
20 import android.util.Base64DataException;
21 import android.util.Base64InputStream;
22 import android.util.Log;
23 
24 import com.android.phone.common.mail.Body;
25 import com.android.phone.common.mail.BodyPart;
26 import com.android.phone.common.mail.Message;
27 import com.android.phone.common.mail.MessagingException;
28 import com.android.phone.common.mail.Multipart;
29 import com.android.phone.common.mail.Part;
30 
31 import org.apache.commons.io.IOUtils;
32 import org.apache.james.mime4j.codec.EncoderUtil;
33 import org.apache.james.mime4j.decoder.DecoderUtil;
34 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
35 import org.apache.james.mime4j.util.CharsetUtil;
36 
37 import java.io.ByteArrayOutputStream;
38 import java.io.IOException;
39 import java.io.InputStream;
40 import java.io.OutputStream;
41 import java.util.ArrayList;
42 import java.util.regex.Matcher;
43 import java.util.regex.Pattern;
44 
45 public class MimeUtility {
46     private static final String LOG_TAG = "Email";
47 
48     public static final String MIME_TYPE_RFC822 = "message/rfc822";
49     private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
50 
51     /**
52      * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
53      * object whenever possible.
54      */
unfold(String s)55     public static String unfold(String s) {
56         if (s == null) {
57             return null;
58         }
59         Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
60         if (patternMatcher.find()) {
61             patternMatcher.reset();
62             s = patternMatcher.replaceAll("");
63         }
64         return s;
65     }
66 
decode(String s)67     public static String decode(String s) {
68         if (s == null) {
69             return null;
70         }
71         return DecoderUtil.decodeEncodedWords(s);
72     }
73 
unfoldAndDecode(String s)74     public static String unfoldAndDecode(String s) {
75         return decode(unfold(s));
76     }
77 
78     // TODO implement proper foldAndEncode
79     // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
80     // duplication of encoding.
foldAndEncode(String s)81     public static String foldAndEncode(String s) {
82         return s;
83     }
84 
85     /**
86      * INTERIM version of foldAndEncode that will be used only by Subject: headers.
87      * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
88      * to other headers.
89      *
90      * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
91      *
92      * @param s original string to encode and fold
93      * @param usedCharacters number of characters already used up by header name
94 
95      * @return the String ready to be transmitted
96      */
foldAndEncode2(String s, int usedCharacters)97     public static String foldAndEncode2(String s, int usedCharacters) {
98         // james.mime4j.codec.EncoderUtil.java
99         // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
100         // Usage.TEXT_TOKENlooks like the right thing for subjects
101         // use WORD_ENTITY for address/names
102 
103         String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
104                 usedCharacters);
105 
106         return fold(encoded, usedCharacters);
107     }
108 
109     /**
110      * INTERIM:  From newer version of org.apache.james (but we don't want to import
111      * the entire MimeUtil class).
112      *
113      * Splits the specified string into a multiple-line representation with
114      * lines no longer than 76 characters (because the line might contain
115      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
116      * 2047</a> section 2). If the string contains non-whitespace sequences
117      * longer than 76 characters a line break is inserted at the whitespace
118      * character following the sequence resulting in a line longer than 76
119      * characters.
120      *
121      * @param s
122      *            string to split.
123      * @param usedCharacters
124      *            number of characters already used up. Usually the number of
125      *            characters for header field name plus colon and one space.
126      * @return a multiple-line representation of the given string.
127      */
fold(String s, int usedCharacters)128     public static String fold(String s, int usedCharacters) {
129         final int maxCharacters = 76;
130 
131         final int length = s.length();
132         if (usedCharacters + length <= maxCharacters)
133             return s;
134 
135         StringBuilder sb = new StringBuilder();
136 
137         int lastLineBreak = -usedCharacters;
138         int wspIdx = indexOfWsp(s, 0);
139         while (true) {
140             if (wspIdx == length) {
141                 sb.append(s.substring(Math.max(0, lastLineBreak)));
142                 return sb.toString();
143             }
144 
145             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
146 
147             if (nextWspIdx - lastLineBreak > maxCharacters) {
148                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
149                 sb.append("\r\n");
150                 lastLineBreak = wspIdx;
151             }
152 
153             wspIdx = nextWspIdx;
154         }
155     }
156 
157     /**
158      * INTERIM:  From newer version of org.apache.james (but we don't want to import
159      * the entire MimeUtil class).
160      *
161      * Search for whitespace.
162      */
indexOfWsp(String s, int fromIndex)163     private static int indexOfWsp(String s, int fromIndex) {
164         final int len = s.length();
165         for (int index = fromIndex; index < len; index++) {
166             char c = s.charAt(index);
167             if (c == ' ' || c == '\t')
168                 return index;
169         }
170         return len;
171     }
172 
173     /**
174      * Returns the named parameter of a header field. If name is null the first
175      * parameter is returned, or if there are no additional parameters in the
176      * field the entire field is returned. Otherwise the named parameter is
177      * searched for in a case insensitive fashion and returned. If the parameter
178      * cannot be found the method returns null.
179      *
180      * TODO: quite inefficient with the inner trimming & splitting.
181      * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
182      * TODO: The doc says that for a null name you get the first param, but you get the header.
183      *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
184      * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
185      *       ('+' -> ' ' conversion too? check RFC)
186      *
187      * @param header
188      * @param name
189      * @return the entire header (if name=null), the found parameter, or null
190      */
getHeaderParameter(String header, String name)191     public static String getHeaderParameter(String header, String name) {
192         if (header == null) {
193             return null;
194         }
195         String[] parts = unfold(header).split(";");
196         if (name == null) {
197             return parts[0].trim();
198         }
199         String lowerCaseName = name.toLowerCase();
200         for (String part : parts) {
201             if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
202                 String[] parameterParts = part.split("=", 2);
203                 if (parameterParts.length < 2) {
204                     return null;
205                 }
206                 String parameter = parameterParts[1].trim();
207                 if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
208                     return parameter.substring(1, parameter.length() - 1);
209                 } else {
210                     return parameter;
211                 }
212             }
213         }
214         return null;
215     }
216 
217     /**
218      * Reads the Part's body and returns a String based on any charset conversion that needed
219      * to be done.
220      * @param part The part containing a body
221      * @return a String containing the converted text in the body, or null if there was no text
222      * or an error during conversion.
223      */
getTextFromPart(Part part)224     public static String getTextFromPart(Part part) {
225         try {
226             if (part != null && part.getBody() != null) {
227                 InputStream in = part.getBody().getInputStream();
228                 String mimeType = part.getMimeType();
229                 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
230                     /*
231                      * Now we read the part into a buffer for further processing. Because
232                      * the stream is now wrapped we'll remove any transfer encoding at this point.
233                      */
234                     ByteArrayOutputStream out = new ByteArrayOutputStream();
235                     IOUtils.copy(in, out);
236                     in.close();
237                     in = null;      // we want all of our memory back, and close might not release
238 
239                     /*
240                      * We've got a text part, so let's see if it needs to be processed further.
241                      */
242                     String charset = getHeaderParameter(part.getContentType(), "charset");
243                     if (charset != null) {
244                         /*
245                          * See if there is conversion from the MIME charset to the Java one.
246                          */
247                         charset = CharsetUtil.toJavaCharset(charset);
248                     }
249                     /*
250                      * No encoding, so use us-ascii, which is the standard.
251                      */
252                     if (charset == null) {
253                         charset = "ASCII";
254                     }
255                     /*
256                      * Convert and return as new String
257                      */
258                     String result = out.toString(charset);
259                     out.close();
260                     return result;
261                 }
262             }
263 
264         }
265         catch (OutOfMemoryError oom) {
266             /*
267              * If we are not able to process the body there's nothing we can do about it. Return
268              * null and let the upper layers handle the missing content.
269              */
270             Log.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
271         }
272         catch (Exception e) {
273             /*
274              * If we are not able to process the body there's nothing we can do about it. Return
275              * null and let the upper layers handle the missing content.
276              */
277             Log.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
278         }
279         return null;
280     }
281 
282     /**
283      * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
284      * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
285      *
286      * @param mimeType A MIME type to check.
287      * @param matchAgainst A MIME type to check against. May include wildcards.
288      * @return true if the mimeType matches
289      */
mimeTypeMatches(String mimeType, String matchAgainst)290     public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
291         Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
292                 Pattern.CASE_INSENSITIVE);
293         return p.matcher(mimeType).matches();
294     }
295 
296     /**
297      * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
298      * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
299      * (e.g. "image/*").
300      *
301      * @param mimeType A MIME type to check.
302      * @param matchAgainst An array of MIME types to check against. May include wildcards.
303      * @return true if the mimeType matches any of the matchAgainst strings
304      */
mimeTypeMatches(String mimeType, String[] matchAgainst)305     public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
306         for (String matchType : matchAgainst) {
307             if (mimeTypeMatches(mimeType, matchType)) {
308                 return true;
309             }
310         }
311         return false;
312     }
313 
314     /**
315      * Given an input stream and a transfer encoding, return a wrapped input stream for that
316      * encoding (or the original if none is required)
317      * @param in the input stream
318      * @param contentTransferEncoding the content transfer encoding
319      * @return a properly wrapped stream
320      */
getInputStreamForContentTransferEncoding(InputStream in, String contentTransferEncoding)321     public static InputStream getInputStreamForContentTransferEncoding(InputStream in,
322             String contentTransferEncoding) {
323         if (contentTransferEncoding != null) {
324             contentTransferEncoding =
325                 MimeUtility.getHeaderParameter(contentTransferEncoding, null);
326             if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
327                 in = new QuotedPrintableInputStream(in);
328             }
329             else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
330                 in = new Base64InputStream(in, Base64.DEFAULT);
331             }
332         }
333         return in;
334     }
335 
336     /**
337      * Removes any content transfer encoding from the stream and returns a Body.
338      */
decodeBody(InputStream in, String contentTransferEncoding)339     public static Body decodeBody(InputStream in, String contentTransferEncoding)
340             throws IOException {
341         /*
342          * We'll remove any transfer encoding by wrapping the stream.
343          */
344         in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
345         BinaryTempFileBody tempBody = new BinaryTempFileBody();
346         OutputStream out = tempBody.getOutputStream();
347         try {
348             IOUtils.copy(in, out);
349         } catch (Base64DataException bde) {
350             // TODO Need to fix this somehow
351             //String warning = "\n\n" + Email.getMessageDecodeErrorString();
352             //out.write(warning.getBytes());
353         } finally {
354             out.close();
355         }
356         return tempBody;
357     }
358 
359     /**
360      * Recursively scan a Part (usually a Message) and sort out which of its children will be
361      * "viewable" and which will be attachments.
362      *
363      * @param part The part to be broken down
364      * @param viewables This arraylist will be populated with all parts that appear to be
365      * the "message" (e.g. text/plain & text/html)
366      * @param attachments This arraylist will be populated with all parts that appear to be
367      * attachments (including inlines)
368      * @throws MessagingException
369      */
collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)370     public static void collectParts(Part part, ArrayList<Part> viewables,
371             ArrayList<Part> attachments) throws MessagingException {
372         String disposition = part.getDisposition();
373         String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
374         // If a disposition is not specified, default to "inline"
375         boolean inline =
376                 TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
377         // The lower-case mime type
378         String mimeType = part.getMimeType().toLowerCase();
379 
380         if (part.getBody() instanceof Multipart) {
381             // If the part is Multipart but not alternative it's either mixed or
382             // something we don't know about, which means we treat it as mixed
383             // per the spec. We just process its pieces recursively.
384             MimeMultipart mp = (MimeMultipart)part.getBody();
385             boolean foundHtml = false;
386             if (mp.getSubTypeForTest().equals("alternative")) {
387                 for (int i = 0; i < mp.getCount(); i++) {
388                     if (mp.getBodyPart(i).isMimeType("text/html")) {
389                         foundHtml = true;
390                         break;
391                     }
392                 }
393             }
394             for (int i = 0; i < mp.getCount(); i++) {
395                 // See if we have text and html
396                 BodyPart bp = mp.getBodyPart(i);
397                 // If there's html, don't bother loading text
398                 if (foundHtml && bp.isMimeType("text/plain")) {
399                     continue;
400                 }
401                 collectParts(bp, viewables, attachments);
402             }
403         } else if (part.getBody() instanceof Message) {
404             // If the part is an embedded message we just continue to process
405             // it, pulling any viewables or attachments into the running list.
406             Message message = (Message)part.getBody();
407             collectParts(message, viewables, attachments);
408         } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
409             // We'll treat text and images as viewables
410             viewables.add(part);
411         } else {
412             // Everything else is an attachment.
413             attachments.add(part);
414         }
415     }
416 }
417