• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.emailcommon.internet;
18 
19 import android.text.TextUtils;
20 import android.util.Base64;
21 import android.util.Base64DataException;
22 import android.util.Base64InputStream;
23 import android.util.Log;
24 
25 import com.android.emailcommon.mail.Body;
26 import com.android.emailcommon.mail.BodyPart;
27 import com.android.emailcommon.mail.Message;
28 import com.android.emailcommon.mail.MessagingException;
29 import com.android.emailcommon.mail.Multipart;
30 import com.android.emailcommon.mail.Part;
31 
32 import org.apache.commons.io.IOUtils;
33 import org.apache.james.mime4j.codec.EncoderUtil;
34 import org.apache.james.mime4j.decoder.DecoderUtil;
35 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
36 import org.apache.james.mime4j.util.CharsetUtil;
37 
38 import java.io.ByteArrayOutputStream;
39 import java.io.IOException;
40 import java.io.InputStream;
41 import java.io.OutputStream;
42 import java.util.ArrayList;
43 import java.util.regex.Matcher;
44 import java.util.regex.Pattern;
45 
46 public class MimeUtility {
47     private static final String LOG_TAG = "Email";
48 
49     public static final String MIME_TYPE_RFC822 = "message/rfc822";
50     private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
51 
52     /**
53      * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
54      * object whenever possible.
55      */
unfold(String s)56     public static String unfold(String s) {
57         if (s == null) {
58             return null;
59         }
60         Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
61         if (patternMatcher.find()) {
62             patternMatcher.reset();
63             s = patternMatcher.replaceAll("");
64         }
65         return s;
66     }
67 
decode(String s)68     public static String decode(String s) {
69         if (s == null) {
70             return null;
71         }
72         return DecoderUtil.decodeEncodedWords(s);
73     }
74 
unfoldAndDecode(String s)75     public static String unfoldAndDecode(String s) {
76         return decode(unfold(s));
77     }
78 
79     // TODO implement proper foldAndEncode
80     // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
81     // duplication of encoding.
foldAndEncode(String s)82     public static String foldAndEncode(String s) {
83         return s;
84     }
85 
86     /**
87      * INTERIM version of foldAndEncode that will be used only by Subject: headers.
88      * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
89      * to other headers.
90      *
91      * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
92      *
93      * @param s original string to encode and fold
94      * @param usedCharacters number of characters already used up by header name
95 
96      * @return the String ready to be transmitted
97      */
foldAndEncode2(String s, int usedCharacters)98     public static String foldAndEncode2(String s, int usedCharacters) {
99         // james.mime4j.codec.EncoderUtil.java
100         // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
101         // Usage.TEXT_TOKENlooks like the right thing for subjects
102         // use WORD_ENTITY for address/names
103 
104         String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
105                 usedCharacters);
106 
107         return fold(encoded, usedCharacters);
108     }
109 
110     /**
111      * INTERIM:  From newer version of org.apache.james (but we don't want to import
112      * the entire MimeUtil class).
113      *
114      * Splits the specified string into a multiple-line representation with
115      * lines no longer than 76 characters (because the line might contain
116      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
117      * 2047</a> section 2). If the string contains non-whitespace sequences
118      * longer than 76 characters a line break is inserted at the whitespace
119      * character following the sequence resulting in a line longer than 76
120      * characters.
121      *
122      * @param s
123      *            string to split.
124      * @param usedCharacters
125      *            number of characters already used up. Usually the number of
126      *            characters for header field name plus colon and one space.
127      * @return a multiple-line representation of the given string.
128      */
fold(String s, int usedCharacters)129     public static String fold(String s, int usedCharacters) {
130         final int maxCharacters = 76;
131 
132         final int length = s.length();
133         if (usedCharacters + length <= maxCharacters)
134             return s;
135 
136         StringBuilder sb = new StringBuilder();
137 
138         int lastLineBreak = -usedCharacters;
139         int wspIdx = indexOfWsp(s, 0);
140         while (true) {
141             if (wspIdx == length) {
142                 sb.append(s.substring(Math.max(0, lastLineBreak)));
143                 return sb.toString();
144             }
145 
146             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
147 
148             if (nextWspIdx - lastLineBreak > maxCharacters) {
149                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
150                 sb.append("\r\n");
151                 lastLineBreak = wspIdx;
152             }
153 
154             wspIdx = nextWspIdx;
155         }
156     }
157 
158     /**
159      * INTERIM:  From newer version of org.apache.james (but we don't want to import
160      * the entire MimeUtil class).
161      *
162      * Search for whitespace.
163      */
indexOfWsp(String s, int fromIndex)164     private static int indexOfWsp(String s, int fromIndex) {
165         final int len = s.length();
166         for (int index = fromIndex; index < len; index++) {
167             char c = s.charAt(index);
168             if (c == ' ' || c == '\t')
169                 return index;
170         }
171         return len;
172     }
173 
174     /**
175      * Returns the named parameter of a header field. If name is null the first
176      * parameter is returned, or if there are no additional parameters in the
177      * field the entire field is returned. Otherwise the named parameter is
178      * searched for in a case insensitive fashion and returned. If the parameter
179      * cannot be found the method returns null.
180      *
181      * TODO: quite inefficient with the inner trimming & splitting.
182      * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
183      * TODO: The doc says that for a null name you get the first param, but you get the header.
184      *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
185      * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
186      *       ('+' -> ' ' conversion too? check RFC)
187      *
188      * @param header
189      * @param name
190      * @return the entire header (if name=null), the found parameter, or null
191      */
getHeaderParameter(String header, String name)192     public static String getHeaderParameter(String header, String name) {
193         if (header == null) {
194             return null;
195         }
196         String[] parts = unfold(header).split(";");
197         if (name == null) {
198             return parts[0].trim();
199         }
200         String lowerCaseName = name.toLowerCase();
201         for (String part : parts) {
202             if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
203                 String[] parameterParts = part.split("=", 2);
204                 if (parameterParts.length < 2) {
205                     return null;
206                 }
207                 String parameter = parameterParts[1].trim();
208                 if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
209                     return parameter.substring(1, parameter.length() - 1);
210                 } else {
211                     return parameter;
212                 }
213             }
214         }
215         return null;
216     }
217 
findFirstPartByMimeType(Part part, String mimeType)218     public static Part findFirstPartByMimeType(Part part, String mimeType)
219             throws MessagingException {
220         if (part.getBody() instanceof Multipart) {
221             Multipart multipart = (Multipart)part.getBody();
222             for (int i = 0, count = multipart.getCount(); i < count; i++) {
223                 BodyPart bodyPart = multipart.getBodyPart(i);
224                 Part ret = findFirstPartByMimeType(bodyPart, mimeType);
225                 if (ret != null) {
226                     return ret;
227                 }
228             }
229         }
230         else if (part.getMimeType().equalsIgnoreCase(mimeType)) {
231             return part;
232         }
233         return null;
234     }
235 
findPartByContentId(Part part, String contentId)236     public static Part findPartByContentId(Part part, String contentId) throws Exception {
237         if (part.getBody() instanceof Multipart) {
238             Multipart multipart = (Multipart)part.getBody();
239             for (int i = 0, count = multipart.getCount(); i < count; i++) {
240                 BodyPart bodyPart = multipart.getBodyPart(i);
241                 Part ret = findPartByContentId(bodyPart, contentId);
242                 if (ret != null) {
243                     return ret;
244                 }
245             }
246         }
247         String cid = part.getContentId();
248         if (contentId.equals(cid)) {
249             return part;
250         }
251         return null;
252     }
253 
254     /**
255      * Reads the Part's body and returns a String based on any charset conversion that needed
256      * to be done.
257      * @param part The part containing a body
258      * @return a String containing the converted text in the body, or null if there was no text
259      * or an error during conversion.
260      */
getTextFromPart(Part part)261     public static String getTextFromPart(Part part) {
262         try {
263             if (part != null && part.getBody() != null) {
264                 InputStream in = part.getBody().getInputStream();
265                 String mimeType = part.getMimeType();
266                 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
267                     /*
268                      * Now we read the part into a buffer for further processing. Because
269                      * the stream is now wrapped we'll remove any transfer encoding at this point.
270                      */
271                     ByteArrayOutputStream out = new ByteArrayOutputStream();
272                     IOUtils.copy(in, out);
273                     in.close();
274                     in = null;      // we want all of our memory back, and close might not release
275 
276                     /*
277                      * We've got a text part, so let's see if it needs to be processed further.
278                      */
279                     String charset = getHeaderParameter(part.getContentType(), "charset");
280                     if (charset != null) {
281                         /*
282                          * See if there is conversion from the MIME charset to the Java one.
283                          */
284                         charset = CharsetUtil.toJavaCharset(charset);
285                     }
286                     /*
287                      * No encoding, so use us-ascii, which is the standard.
288                      */
289                     if (charset == null) {
290                         charset = "ASCII";
291                     }
292                     /*
293                      * Convert and return as new String
294                      */
295                     String result = out.toString(charset);
296                     out.close();
297                     return result;
298                 }
299             }
300 
301         }
302         catch (OutOfMemoryError oom) {
303             /*
304              * If we are not able to process the body there's nothing we can do about it. Return
305              * null and let the upper layers handle the missing content.
306              */
307             Log.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
308         }
309         catch (Exception e) {
310             /*
311              * If we are not able to process the body there's nothing we can do about it. Return
312              * null and let the upper layers handle the missing content.
313              */
314             Log.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
315         }
316         return null;
317     }
318 
319     /**
320      * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
321      * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
322      *
323      * @param mimeType A MIME type to check.
324      * @param matchAgainst A MIME type to check against. May include wildcards.
325      * @return true if the mimeType matches
326      */
mimeTypeMatches(String mimeType, String matchAgainst)327     public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
328         Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
329                 Pattern.CASE_INSENSITIVE);
330         return p.matcher(mimeType).matches();
331     }
332 
333     /**
334      * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
335      * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
336      * (e.g. "image/*").
337      *
338      * @param mimeType A MIME type to check.
339      * @param matchAgainst An array of MIME types to check against. May include wildcards.
340      * @return true if the mimeType matches any of the matchAgainst strings
341      */
mimeTypeMatches(String mimeType, String[] matchAgainst)342     public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
343         for (String matchType : matchAgainst) {
344             if (mimeTypeMatches(mimeType, matchType)) {
345                 return true;
346             }
347         }
348         return false;
349     }
350 
351     /**
352      * Given an input stream and a transfer encoding, return a wrapped input stream for that
353      * encoding (or the original if none is required)
354      * @param in the input stream
355      * @param contentTransferEncoding the content transfer encoding
356      * @return a properly wrapped stream
357      */
getInputStreamForContentTransferEncoding(InputStream in, String contentTransferEncoding)358     public static InputStream getInputStreamForContentTransferEncoding(InputStream in,
359             String contentTransferEncoding) {
360         if (contentTransferEncoding != null) {
361             contentTransferEncoding =
362                 MimeUtility.getHeaderParameter(contentTransferEncoding, null);
363             if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
364                 in = new QuotedPrintableInputStream(in);
365             }
366             else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
367                 in = new Base64InputStream(in, Base64.DEFAULT);
368             }
369         }
370         return in;
371     }
372 
373     /**
374      * Removes any content transfer encoding from the stream and returns a Body.
375      */
decodeBody(InputStream in, String contentTransferEncoding)376     public static Body decodeBody(InputStream in, String contentTransferEncoding)
377             throws IOException {
378         /*
379          * We'll remove any transfer encoding by wrapping the stream.
380          */
381         in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
382         BinaryTempFileBody tempBody = new BinaryTempFileBody();
383         OutputStream out = tempBody.getOutputStream();
384         try {
385             IOUtils.copy(in, out);
386         } catch (Base64DataException bde) {
387             // TODO Need to fix this somehow
388             //String warning = "\n\n" + Email.getMessageDecodeErrorString();
389             //out.write(warning.getBytes());
390         } finally {
391             out.close();
392         }
393         return tempBody;
394     }
395 
396     /**
397      * Recursively scan a Part (usually a Message) and sort out which of its children will be
398      * "viewable" and which will be attachments.
399      *
400      * @param part The part to be broken down
401      * @param viewables This arraylist will be populated with all parts that appear to be
402      * the "message" (e.g. text/plain & text/html)
403      * @param attachments This arraylist will be populated with all parts that appear to be
404      * attachments (including inlines)
405      * @throws MessagingException
406      */
collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)407     public static void collectParts(Part part, ArrayList<Part> viewables,
408             ArrayList<Part> attachments) throws MessagingException {
409         String disposition = part.getDisposition();
410         String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
411         // If a disposition is not specified, default to "inline"
412         boolean inline =
413                 TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
414         // The lower-case mime type
415         String mimeType = part.getMimeType().toLowerCase();
416 
417         if (part.getBody() instanceof Multipart) {
418             // If the part is Multipart but not alternative it's either mixed or
419             // something we don't know about, which means we treat it as mixed
420             // per the spec. We just process its pieces recursively.
421             MimeMultipart mp = (MimeMultipart)part.getBody();
422             boolean foundHtml = false;
423             if (mp.getSubTypeForTest().equals("alternative")) {
424                 for (int i = 0; i < mp.getCount(); i++) {
425                     if (mp.getBodyPart(i).isMimeType("text/html")) {
426                         foundHtml = true;
427                         break;
428                     }
429                 }
430             }
431             for (int i = 0; i < mp.getCount(); i++) {
432                 // See if we have text and html
433                 BodyPart bp = mp.getBodyPart(i);
434                 // If there's html, don't bother loading text
435                 if (foundHtml && bp.isMimeType("text/plain")) {
436                     continue;
437                 }
438                 collectParts(bp, viewables, attachments);
439             }
440         } else if (part.getBody() instanceof Message) {
441             // If the part is an embedded message we just continue to process
442             // it, pulling any viewables or attachments into the running list.
443             Message message = (Message)part.getBody();
444             collectParts(message, viewables, attachments);
445         } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
446             // We'll treat text and images as viewables
447             viewables.add(part);
448         } else {
449             // Everything else is an attachment.
450             attachments.add(part);
451         }
452     }
453 }
454