• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.emailcommon.internet;
18 
19 import android.util.Base64;
20 import android.util.Base64DataException;
21 import android.util.Base64InputStream;
22 import android.util.Log;
23 
24 import com.android.emailcommon.Logging;
25 import com.android.emailcommon.mail.Body;
26 import com.android.emailcommon.mail.BodyPart;
27 import com.android.emailcommon.mail.Message;
28 import com.android.emailcommon.mail.MessagingException;
29 import com.android.emailcommon.mail.Multipart;
30 import com.android.emailcommon.mail.Part;
31 
32 import org.apache.commons.io.IOUtils;
33 import org.apache.james.mime4j.codec.EncoderUtil;
34 import org.apache.james.mime4j.decoder.DecoderUtil;
35 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
36 import org.apache.james.mime4j.util.CharsetUtil;
37 
38 import java.io.ByteArrayOutputStream;
39 import java.io.IOException;
40 import java.io.InputStream;
41 import java.io.OutputStream;
42 import java.util.ArrayList;
43 import java.util.regex.Matcher;
44 import java.util.regex.Pattern;
45 
46 public class MimeUtility {
47 
48     public static final String MIME_TYPE_RFC822 = "message/rfc822";
49     private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
50 
51     /**
52      * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
53      * object whenever possible.
54      */
unfold(String s)55     public static String unfold(String s) {
56         if (s == null) {
57             return null;
58         }
59         Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
60         if (patternMatcher.find()) {
61             patternMatcher.reset();
62             s = patternMatcher.replaceAll("");
63         }
64         return s;
65     }
66 
decode(String s)67     public static String decode(String s) {
68         if (s == null) {
69             return null;
70         }
71         return DecoderUtil.decodeEncodedWords(s);
72     }
73 
unfoldAndDecode(String s)74     public static String unfoldAndDecode(String s) {
75         return decode(unfold(s));
76     }
77 
78     // TODO implement proper foldAndEncode
79     // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
80     // duplication of encoding.
foldAndEncode(String s)81     public static String foldAndEncode(String s) {
82         return s;
83     }
84 
85     /**
86      * INTERIM version of foldAndEncode that will be used only by Subject: headers.
87      * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
88      * to other headers.
89      *
90      * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
91      *
92      * @param s original string to encode and fold
93      * @param usedCharacters number of characters already used up by header name
94 
95      * @return the String ready to be transmitted
96      */
foldAndEncode2(String s, int usedCharacters)97     public static String foldAndEncode2(String s, int usedCharacters) {
98         // james.mime4j.codec.EncoderUtil.java
99         // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
100         // Usage.TEXT_TOKENlooks like the right thing for subjects
101         // use WORD_ENTITY for address/names
102 
103         String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
104                 usedCharacters);
105 
106         return fold(encoded, usedCharacters);
107     }
108 
109     /**
110      * INTERIM:  From newer version of org.apache.james (but we don't want to import
111      * the entire MimeUtil class).
112      *
113      * Splits the specified string into a multiple-line representation with
114      * lines no longer than 76 characters (because the line might contain
115      * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
116      * 2047</a> section 2). If the string contains non-whitespace sequences
117      * longer than 76 characters a line break is inserted at the whitespace
118      * character following the sequence resulting in a line longer than 76
119      * characters.
120      *
121      * @param s
122      *            string to split.
123      * @param usedCharacters
124      *            number of characters already used up. Usually the number of
125      *            characters for header field name plus colon and one space.
126      * @return a multiple-line representation of the given string.
127      */
fold(String s, int usedCharacters)128     public static String fold(String s, int usedCharacters) {
129         final int maxCharacters = 76;
130 
131         final int length = s.length();
132         if (usedCharacters + length <= maxCharacters)
133             return s;
134 
135         StringBuilder sb = new StringBuilder();
136 
137         int lastLineBreak = -usedCharacters;
138         int wspIdx = indexOfWsp(s, 0);
139         while (true) {
140             if (wspIdx == length) {
141                 sb.append(s.substring(Math.max(0, lastLineBreak)));
142                 return sb.toString();
143             }
144 
145             int nextWspIdx = indexOfWsp(s, wspIdx + 1);
146 
147             if (nextWspIdx - lastLineBreak > maxCharacters) {
148                 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
149                 sb.append("\r\n");
150                 lastLineBreak = wspIdx;
151             }
152 
153             wspIdx = nextWspIdx;
154         }
155     }
156 
157     /**
158      * INTERIM:  From newer version of org.apache.james (but we don't want to import
159      * the entire MimeUtil class).
160      *
161      * Search for whitespace.
162      */
indexOfWsp(String s, int fromIndex)163     private static int indexOfWsp(String s, int fromIndex) {
164         final int len = s.length();
165         for (int index = fromIndex; index < len; index++) {
166             char c = s.charAt(index);
167             if (c == ' ' || c == '\t')
168                 return index;
169         }
170         return len;
171     }
172 
173     /**
174      * Returns the named parameter of a header field. If name is null the first
175      * parameter is returned, or if there are no additional parameters in the
176      * field the entire field is returned. Otherwise the named parameter is
177      * searched for in a case insensitive fashion and returned. If the parameter
178      * cannot be found the method returns null.
179      *
180      * TODO: quite inefficient with the inner trimming & splitting.
181      * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
182      * TODO: The doc says that for a null name you get the first param, but you get the header.
183      *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
184      * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
185      *       ('+' -> ' ' conversion too? check RFC)
186      *
187      * @param header
188      * @param name
189      * @return the entire header (if name=null), the found parameter, or null
190      */
getHeaderParameter(String header, String name)191     public static String getHeaderParameter(String header, String name) {
192         if (header == null) {
193             return null;
194         }
195         String[] parts = unfold(header).split(";");
196         if (name == null) {
197             return parts[0].trim();
198         }
199         String lowerCaseName = name.toLowerCase();
200         for (String part : parts) {
201             if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
202                 String[] parameterParts = part.split("=", 2);
203                 if (parameterParts.length < 2) {
204                     return null;
205                 }
206                 String parameter = parameterParts[1].trim();
207                 if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
208                     return parameter.substring(1, parameter.length() - 1);
209                 } else {
210                     return parameter;
211                 }
212             }
213         }
214         return null;
215     }
216 
findFirstPartByMimeType(Part part, String mimeType)217     public static Part findFirstPartByMimeType(Part part, String mimeType)
218             throws MessagingException {
219         if (part.getBody() instanceof Multipart) {
220             Multipart multipart = (Multipart)part.getBody();
221             for (int i = 0, count = multipart.getCount(); i < count; i++) {
222                 BodyPart bodyPart = multipart.getBodyPart(i);
223                 Part ret = findFirstPartByMimeType(bodyPart, mimeType);
224                 if (ret != null) {
225                     return ret;
226                 }
227             }
228         }
229         else if (part.getMimeType().equalsIgnoreCase(mimeType)) {
230             return part;
231         }
232         return null;
233     }
234 
findPartByContentId(Part part, String contentId)235     public static Part findPartByContentId(Part part, String contentId) throws Exception {
236         if (part.getBody() instanceof Multipart) {
237             Multipart multipart = (Multipart)part.getBody();
238             for (int i = 0, count = multipart.getCount(); i < count; i++) {
239                 BodyPart bodyPart = multipart.getBodyPart(i);
240                 Part ret = findPartByContentId(bodyPart, contentId);
241                 if (ret != null) {
242                     return ret;
243                 }
244             }
245         }
246         String cid = part.getContentId();
247         if (contentId.equals(cid)) {
248             return part;
249         }
250         return null;
251     }
252 
253     /**
254      * Reads the Part's body and returns a String based on any charset conversion that needed
255      * to be done.
256      * @param part The part containing a body
257      * @return a String containing the converted text in the body, or null if there was no text
258      * or an error during conversion.
259      */
getTextFromPart(Part part)260     public static String getTextFromPart(Part part) {
261         try {
262             if (part != null && part.getBody() != null) {
263                 InputStream in = part.getBody().getInputStream();
264                 String mimeType = part.getMimeType();
265                 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
266                     /*
267                      * Now we read the part into a buffer for further processing. Because
268                      * the stream is now wrapped we'll remove any transfer encoding at this point.
269                      */
270                     ByteArrayOutputStream out = new ByteArrayOutputStream();
271                     IOUtils.copy(in, out);
272                     in.close();
273                     in = null;      // we want all of our memory back, and close might not release
274 
275                     /*
276                      * We've got a text part, so let's see if it needs to be processed further.
277                      */
278                     String charset = getHeaderParameter(part.getContentType(), "charset");
279                     if (charset != null) {
280                         /*
281                          * See if there is conversion from the MIME charset to the Java one.
282                          */
283                         charset = CharsetUtil.toJavaCharset(charset);
284                     }
285                     /*
286                      * No encoding, so use us-ascii, which is the standard.
287                      */
288                     if (charset == null) {
289                         charset = "ASCII";
290                     }
291                     /*
292                      * Convert and return as new String
293                      */
294                     String result = out.toString(charset);
295                     out.close();
296                     return result;
297                 }
298             }
299 
300         }
301         catch (OutOfMemoryError oom) {
302             /*
303              * If we are not able to process the body there's nothing we can do about it. Return
304              * null and let the upper layers handle the missing content.
305              */
306             Log.e(Logging.LOG_TAG, "Unable to getTextFromPart " + oom.toString());
307         }
308         catch (Exception e) {
309             /*
310              * If we are not able to process the body there's nothing we can do about it. Return
311              * null and let the upper layers handle the missing content.
312              */
313             Log.e(Logging.LOG_TAG, "Unable to getTextFromPart " + e.toString());
314         }
315         return null;
316     }
317 
318     /**
319      * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
320      * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
321      *
322      * @param mimeType A MIME type to check.
323      * @param matchAgainst A MIME type to check against. May include wildcards.
324      * @return true if the mimeType matches
325      */
mimeTypeMatches(String mimeType, String matchAgainst)326     public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
327         Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
328                 Pattern.CASE_INSENSITIVE);
329         return p.matcher(mimeType).matches();
330     }
331 
332     /**
333      * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
334      * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
335      * (e.g. "image/*").
336      *
337      * @param mimeType A MIME type to check.
338      * @param matchAgainst An array of MIME types to check against. May include wildcards.
339      * @return true if the mimeType matches any of the matchAgainst strings
340      */
mimeTypeMatches(String mimeType, String[] matchAgainst)341     public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
342         for (String matchType : matchAgainst) {
343             if (mimeTypeMatches(mimeType, matchType)) {
344                 return true;
345             }
346         }
347         return false;
348     }
349 
350     /**
351      * Given an input stream and a transfer encoding, return a wrapped input stream for that
352      * encoding (or the original if none is required)
353      * @param in the input stream
354      * @param contentTransferEncoding the content transfer encoding
355      * @return a properly wrapped stream
356      */
getInputStreamForContentTransferEncoding(InputStream in, String contentTransferEncoding)357     public static InputStream getInputStreamForContentTransferEncoding(InputStream in,
358             String contentTransferEncoding) {
359         if (contentTransferEncoding != null) {
360             contentTransferEncoding =
361                 MimeUtility.getHeaderParameter(contentTransferEncoding, null);
362             if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
363                 in = new QuotedPrintableInputStream(in);
364             }
365             else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
366                 in = new Base64InputStream(in, Base64.DEFAULT);
367             }
368         }
369         return in;
370     }
371 
372     /**
373      * Removes any content transfer encoding from the stream and returns a Body.
374      */
decodeBody(InputStream in, String contentTransferEncoding)375     public static Body decodeBody(InputStream in, String contentTransferEncoding)
376             throws IOException {
377         /*
378          * We'll remove any transfer encoding by wrapping the stream.
379          */
380         in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
381         BinaryTempFileBody tempBody = new BinaryTempFileBody();
382         OutputStream out = tempBody.getOutputStream();
383         try {
384             IOUtils.copy(in, out);
385         } catch (Base64DataException bde) {
386             // TODO Need to fix this somehow
387             //String warning = "\n\n" + Email.getMessageDecodeErrorString();
388             //out.write(warning.getBytes());
389         } finally {
390             out.close();
391         }
392         return tempBody;
393     }
394 
395     /**
396      * Recursively scan a Part (usually a Message) and sort out which of its children will be
397      * "viewable" and which will be attachments.
398      *
399      * @param part The part to be broken down
400      * @param viewables This arraylist will be populated with all parts that appear to be
401      * the "message" (e.g. text/plain & text/html)
402      * @param attachments This arraylist will be populated with all parts that appear to be
403      * attachments (including inlines)
404      * @throws MessagingException
405      */
collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)406     public static void collectParts(Part part, ArrayList<Part> viewables,
407             ArrayList<Part> attachments) throws MessagingException {
408         String disposition = part.getDisposition();
409         String dispositionType = null;
410         String dispositionFilename = null;
411         if (disposition != null) {
412             dispositionType = MimeUtility.getHeaderParameter(disposition, null);
413             dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename");
414         }
415         // An attachment filename can be defined in either the Content-Disposition header
416         // or the Content-Type header. Content-Disposition is preferred, so we only try
417         // the Content-Type header as a last resort.
418         if (dispositionFilename == null) {
419             String contentType = part.getContentType();
420             dispositionFilename = MimeUtility.getHeaderParameter(contentType, "name");
421         }
422         boolean attachmentDisposition = "attachment".equalsIgnoreCase(dispositionType);
423         // If a disposition is not specified, default to "inline"
424         boolean inlineDisposition = dispositionType == null
425                 || "inline".equalsIgnoreCase(dispositionType);
426 
427         // A guess that this part is intended to be an attachment
428         boolean attachment = attachmentDisposition
429                 || (dispositionFilename != null && !inlineDisposition);
430 
431         // A guess that this part is intended to be an inline.
432         boolean inline = inlineDisposition && (dispositionFilename != null);
433 
434         // One or the other
435         boolean attachmentOrInline = attachment || inline;
436 
437         if (part.getBody() instanceof Multipart) {
438             // If the part is Multipart but not alternative it's either mixed or
439             // something we don't know about, which means we treat it as mixed
440             // per the spec. We just process its pieces recursively.
441             MimeMultipart mp = (MimeMultipart)part.getBody();
442             boolean foundHtml = false;
443             if (mp.getSubTypeForTest().equals("alternative")) {
444                 for (int i = 0; i < mp.getCount(); i++) {
445                     if (mp.getBodyPart(i).isMimeType("text/html")) {
446                         foundHtml = true;
447                         break;
448                     }
449                 }
450             }
451             for (int i = 0; i < mp.getCount(); i++) {
452                 // See if we have text and html
453                 BodyPart bp = mp.getBodyPart(i);
454                 // If there's html, don't bother loading text
455                 if (foundHtml && bp.isMimeType("text/plain")) {
456                     continue;
457                 }
458                 collectParts(bp, viewables, attachments);
459             }
460         } else if (part.getBody() instanceof Message) {
461             // If the part is an embedded message we just continue to process
462             // it, pulling any viewables or attachments into the running list.
463             Message message = (Message)part.getBody();
464             collectParts(message, viewables, attachments);
465         } else if ((!attachmentOrInline) && ("text/html".equalsIgnoreCase(part.getMimeType()))) {
466             // If the part is HTML and we got this far, it's a viewable part of a mixed
467             viewables.add(part);
468         } else if ((!attachmentOrInline) && ("text/plain".equalsIgnoreCase(part.getMimeType()))) {
469             // If the part is text and we got this far, it's a viewable part of a mixed
470             viewables.add(part);
471         } else if (attachmentOrInline) {
472             // Finally, if it's an attachment or an inline we will include it as an attachment.
473             attachments.add(part);
474         }
475     }
476 }
477